two version of R2R are hereHEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
1 files changed, 1793 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi b/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi
new file mode 100644
index 00000000..fb60af7d
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi
@@ -0,0 +1,1793 @@
+# Private/public helper functions for API functions
+
+from lxml.includes cimport uri
+
+
+cdef void displayNode(xmlNode* c_node, indent) noexcept:
+    # to help with debugging
+    cdef xmlNode* c_child
+    try:
+        print(indent * ' ', <long>c_node)
+        c_child = c_node.children
+        while c_child is not NULL:
+            displayNode(c_child, indent + 1)
+            c_child = c_child.next
+    finally:
+        return  # swallow any exceptions
+
+cdef inline bint _isHtmlDocument(_Element element) except -1:
+    cdef xmlNode* c_node = element._c_node
+    return (
+        c_node is not NULL and c_node.doc is not NULL and
+        c_node.doc.properties & tree.XML_DOC_HTML != 0
+    )
+
+cdef inline int _assertValidNode(_Element element) except -1:
+    assert element._c_node is not NULL, "invalid Element proxy at %s" % id(element)
+
+cdef inline int _assertValidDoc(_Document doc) except -1:
+    assert doc._c_doc is not NULL, "invalid Document proxy at %s" % id(doc)
+
+cdef _Document _documentOrRaise(object input):
+    """Call this to get the document of a _Document, _ElementTree or _Element
+    object, or to raise an exception if it can't be determined.
+
+    Should be used in all API functions for consistency.
+    """
+    cdef _Document doc
+    if isinstance(input, _ElementTree):
+        if (<_ElementTree>input)._context_node is not None:
+            doc = (<_ElementTree>input)._context_node._doc
+        else:
+            doc = None
+    elif isinstance(input, _Element):
+        doc = (<_Element>input)._doc
+    elif isinstance(input, _Document):
+        doc = <_Document>input
+    else:
+        raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}"
+    if doc is None:
+        raise ValueError, f"Input object has no document: {python._fqtypename(input).decode('utf8')}"
+    _assertValidDoc(doc)
+    return doc
+
+cdef _Element _rootNodeOrRaise(object input):
+    """Call this to get the root node of a _Document, _ElementTree or
+     _Element object, or to raise an exception if it can't be determined.
+
+    Should be used in all API functions for consistency.
+     """
+    cdef _Element node
+    if isinstance(input, _ElementTree):
+        node = (<_ElementTree>input)._context_node
+    elif isinstance(input, _Element):
+        node = <_Element>input
+    elif isinstance(input, _Document):
+        node = (<_Document>input).getroot()
+    else:
+        raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}"
+    if (node is None or not node._c_node or
+            node._c_node.type != tree.XML_ELEMENT_NODE):
+        raise ValueError, f"Input object is not an XML element: {python._fqtypename(input).decode('utf8')}"
+    _assertValidNode(node)
+    return node
+
+cdef bint _isAncestorOrSame(xmlNode* c_ancestor, xmlNode* c_node) noexcept:
+    while c_node:
+        if c_node is c_ancestor:
+            return True
+        c_node = c_node.parent
+    return False
+
+cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc,
+                           _BaseParser parser, text, tail, attrib, nsmap,
+                           dict extra_attrs):
+    """Create a new element and initialize text content, namespaces and
+    attributes.
+
+    This helper function will reuse as much of the existing document as
+    possible:
+
+    If 'parser' is None, the parser will be inherited from 'doc' or the
+    default parser will be used.
+
+    If 'doc' is None, 'c_doc' is used to create a new _Document and the new
+    element is made its root node.
+
+    If 'c_doc' is also NULL, a new xmlDoc will be created.
+    """
+    cdef xmlNode* c_node
+    if doc is not None:
+        c_doc = doc._c_doc
+    ns_utf, name_utf = _getNsTag(tag)
+    if parser is not None and parser._for_html:
+        _htmlTagValidOrRaise(name_utf)
+        if c_doc is NULL:
+            c_doc = _newHTMLDoc()
+    else:
+        _tagValidOrRaise(name_utf)
+        if c_doc is NULL:
+            c_doc = _newXMLDoc()
+    c_node = _createElement(c_doc, name_utf)
+    if c_node is NULL:
+        if doc is None and c_doc is not NULL:
+            tree.xmlFreeDoc(c_doc)
+        raise MemoryError()
+    try:
+        if doc is None:
+            tree.xmlDocSetRootElement(c_doc, c_node)
+            doc = _documentFactory(c_doc, parser)
+        if text is not None:
+            _setNodeText(c_node, text)
+        if tail is not None:
+            _setTailText(c_node, tail)
+        # add namespaces to node if necessary
+        _setNodeNamespaces(c_node, doc, ns_utf, nsmap)
+        _initNodeAttributes(c_node, doc, attrib, extra_attrs)
+        return _elementFactory(doc, c_node)
+    except:
+        # free allocated c_node/c_doc unless Python does it for us
+        if c_node.doc is not c_doc:
+            # node not yet in document => will not be freed by document
+            if tail is not None:
+                _removeText(c_node.next) # tail
+            tree.xmlFreeNode(c_node)
+        if doc is None:
+            # c_doc will not be freed by doc
+            tree.xmlFreeDoc(c_doc)
+        raise
+
+cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf,
+                         _BaseParser parser, attrib, nsmap, dict extra_attrs) except -1:
+    """Initialise a new Element object.
+
+    This is used when users instantiate a Python Element subclass
+    directly, without it being mapped to an existing XML node.
+    """
+    cdef xmlDoc* c_doc
+    cdef xmlNode* c_node
+    cdef _Document doc
+    if is_html:
+        _htmlTagValidOrRaise(name_utf)
+        c_doc = _newHTMLDoc()
+    else:
+        _tagValidOrRaise(name_utf)
+        c_doc = _newXMLDoc()
+    c_node = _createElement(c_doc, name_utf)
+    if c_node is NULL:
+        if c_doc is not NULL:
+            tree.xmlFreeDoc(c_doc)
+        raise MemoryError()
+    tree.xmlDocSetRootElement(c_doc, c_node)
+    doc = _documentFactory(c_doc, parser)
+    # add namespaces to node if necessary
+    _setNodeNamespaces(c_node, doc, ns_utf, nsmap)
+    _initNodeAttributes(c_node, doc, attrib, extra_attrs)
+    _registerProxy(element, doc, c_node)
+    element._init()
+    return 0
+
+cdef _Element _makeSubElement(_Element parent, tag, text, tail,
+                              attrib, nsmap, dict extra_attrs):
+    """Create a new child element and initialize text content, namespaces and
+    attributes.
+    """
+    cdef xmlNode* c_node
+    cdef xmlDoc* c_doc
+    if parent is None or parent._doc is None:
+        return None
+    _assertValidNode(parent)
+    ns_utf, name_utf = _getNsTag(tag)
+    c_doc = parent._doc._c_doc
+
+    if parent._doc._parser is not None and parent._doc._parser._for_html:
+        _htmlTagValidOrRaise(name_utf)
+    else:
+        _tagValidOrRaise(name_utf)
+
+    c_node = _createElement(c_doc, name_utf)
+    if c_node is NULL:
+        raise MemoryError()
+    tree.xmlAddChild(parent._c_node, c_node)
+
+    try:
+        if text is not None:
+            _setNodeText(c_node, text)
+        if tail is not None:
+            _setTailText(c_node, tail)
+
+        # add namespaces to node if necessary
+        _setNodeNamespaces(c_node, parent._doc, ns_utf, nsmap)
+        _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs)
+        return _elementFactory(parent._doc, c_node)
+    except:
+        # make sure we clean up in case of an error
+        _removeNode(parent._doc, c_node)
+        raise
+
+
+cdef int _setNodeNamespaces(xmlNode* c_node, _Document doc,
+                            object node_ns_utf, object nsmap) except -1:
+    """Lookup current namespace prefixes, then set namespace structure for
+    node (if 'node_ns_utf' was provided) and register new ns-prefix mappings.
+
+    'node_ns_utf' should only be passed for a newly created node.
+    """
+    cdef xmlNs* c_ns
+    cdef list nsdefs
+
+    if nsmap:
+        for prefix, href in _iter_nsmap(nsmap):
+            href_utf = _utf8(href)
+            _uriValidOrRaise(href_utf)
+            c_href = _xcstr(href_utf)
+            if prefix is not None:
+                prefix_utf = _utf8(prefix)
+                _prefixValidOrRaise(prefix_utf)
+                c_prefix = _xcstr(prefix_utf)
+            else:
+                c_prefix = <const_xmlChar*>NULL
+            # add namespace with prefix if it is not already known
+            c_ns = tree.xmlSearchNs(doc._c_doc, c_node, c_prefix)
+            if c_ns is NULL or \
+                    c_ns.href is NULL or \
+                    tree.xmlStrcmp(c_ns.href, c_href) != 0:
+                c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
+            if href_utf == node_ns_utf:
+                tree.xmlSetNs(c_node, c_ns)
+                node_ns_utf = None
+
+    if node_ns_utf is not None:
+        _uriValidOrRaise(node_ns_utf)
+        doc._setNodeNs(c_node, _xcstr(node_ns_utf))
+    return 0
+
+
+cdef dict _build_nsmap(xmlNode* c_node):
+    """
+    Namespace prefix->URI mapping known in the context of this Element.
+    This includes all namespace declarations of the parents.
+    """
+    cdef xmlNs* c_ns
+    nsmap = {}
+    while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE:
+        c_ns = c_node.nsDef
+        while c_ns is not NULL:
+            if c_ns.prefix or c_ns.href:
+                prefix = funicodeOrNone(c_ns.prefix)
+                if prefix not in nsmap:
+                    nsmap[prefix] = funicodeOrNone(c_ns.href)
+            c_ns = c_ns.next
+        c_node = c_node.parent
+    return nsmap
+
+
+cdef _iter_nsmap(nsmap):
+    """
+    Create a reproducibly ordered iterable from an nsmap mapping.
+    Tries to preserve an existing order and sorts if it assumes no order.
+
+    The difference to _iter_attrib() is that None doesn't sort with strings
+    in Py3.x.
+    """
+    if isinstance(nsmap, dict):
+        # dicts are insertion-ordered in Py3.6+ => keep the user provided order.
+        return nsmap.items()
+    if len(nsmap) <= 1:
+        return nsmap.items()
+    # nsmap will usually be a plain unordered dict => avoid type checking overhead
+    if type(nsmap) is not dict and isinstance(nsmap, OrderedDict):
+        return nsmap.items()  # keep existing order
+    if None not in nsmap:
+        return sorted(nsmap.items())
+
+    # Move the default namespace to the end.  This makes sure libxml2
+    # prefers a prefix if the ns is defined redundantly on the same
+    # element.  That way, users can work around a problem themselves
+    # where default namespace attributes on non-default namespaced
+    # elements serialise without prefix (i.e. into the non-default
+    # namespace).
+    default_ns = nsmap[None]
+    nsdefs = [(k, v) for k, v in nsmap.items() if k is not None]
+    nsdefs.sort()
+    nsdefs.append((None, default_ns))
+    return nsdefs
+
+
+cdef _iter_attrib(attrib):
+    """
+    Create a reproducibly ordered iterable from an attrib mapping.
+    Tries to preserve an existing order and sorts if it assumes no order.
+    """
+    # dicts are insertion-ordered in Py3.6+ => keep the user provided order.
+    if isinstance(attrib, (dict, _Attrib, OrderedDict)):
+        return attrib.items()
+    # assume it's an unordered mapping of some kind
+    return sorted(attrib.items())
+
+
+cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, dict extra):
+    """Initialise the attributes of an element node.
+    """
+    cdef bint is_html
+    cdef xmlNs* c_ns
+    if attrib is not None and not hasattr(attrib, 'items'):
+        raise TypeError, f"Invalid attribute dictionary: {python._fqtypename(attrib).decode('utf8')}"
+    if not attrib and not extra:
+        return  # nothing to do
+    is_html = doc._parser._for_html
+    seen = set()
+    if extra:
+        for name, value in extra.items():
+            _addAttributeToNode(c_node, doc, is_html, name, value, seen)
+    if attrib:
+        for name, value in _iter_attrib(attrib):
+            _addAttributeToNode(c_node, doc, is_html, name, value, seen)
+
+
+cdef int _addAttributeToNode(xmlNode* c_node, _Document doc, bint is_html,
+                             name, value, set seen_tags) except -1:
+    ns_utf, name_utf = tag = _getNsTag(name)
+    if tag in seen_tags:
+        return 0
+    seen_tags.add(tag)
+    if not is_html:
+        _attributeValidOrRaise(name_utf)
+    value_utf = _utf8(value)
+    if ns_utf is None:
+        tree.xmlNewProp(c_node, _xcstr(name_utf), _xcstr(value_utf))
+    else:
+        _uriValidOrRaise(ns_utf)
+        c_ns = doc._findOrBuildNodeNs(c_node, _xcstr(ns_utf), NULL, 1)
+        tree.xmlNewNsProp(c_node, c_ns,
+                          _xcstr(name_utf), _xcstr(value_utf))
+    return 0
+
+
+ctypedef struct _ns_node_ref:
+    xmlNs* ns
+    xmlNode* node
+
+
+cdef int _collectNsDefs(xmlNode* c_element, _ns_node_ref **_c_ns_list,
+                        size_t *_c_ns_list_len, size_t *_c_ns_list_size) except -1:
+    c_ns_list = _c_ns_list[0]
+    cdef size_t c_ns_list_len = _c_ns_list_len[0]
+    cdef size_t c_ns_list_size = _c_ns_list_size[0]
+
+    c_nsdef = c_element.nsDef
+    while c_nsdef is not NULL:
+        if c_ns_list_len >= c_ns_list_size:
+            if c_ns_list is NULL:
+                c_ns_list_size = 20
+            else:
+                c_ns_list_size *= 2
+            c_nsref_ptr = <_ns_node_ref*> python.lxml_realloc(
+                c_ns_list, c_ns_list_size, sizeof(_ns_node_ref))
+            if c_nsref_ptr is NULL:
+                if c_ns_list is not NULL:
+                    python.lxml_free(c_ns_list)
+                    _c_ns_list[0] = NULL
+                raise MemoryError()
+            c_ns_list = c_nsref_ptr
+
+        c_ns_list[c_ns_list_len] = _ns_node_ref(c_nsdef, c_element)
+        c_ns_list_len += 1
+        c_nsdef = c_nsdef.next
+
+    _c_ns_list_size[0] = c_ns_list_size
+    _c_ns_list_len[0] = c_ns_list_len
+    _c_ns_list[0] = c_ns_list
+
+
+cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element, set prefixes_to_keep) except -1:
+    """Remove any namespace declarations from a subtree that are not used by
+    any of its elements (or attributes).
+
+    If a 'prefixes_to_keep' is provided, it must be a set of prefixes.
+    Any corresponding namespace mappings will not be removed as part of the cleanup.
+    """
+    cdef xmlNode* c_node
+    cdef _ns_node_ref* c_ns_list = NULL
+    cdef size_t c_ns_list_size = 0
+    cdef size_t c_ns_list_len = 0
+    cdef size_t i
+
+    if c_element.parent and c_element.parent.type == tree.XML_DOCUMENT_NODE:
+        # include declarations on the document node
+        _collectNsDefs(c_element.parent, &c_ns_list, &c_ns_list_len, &c_ns_list_size)
+
+    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1)
+    # collect all new namespace declarations into the ns list
+    if c_element.nsDef:
+        _collectNsDefs(c_element, &c_ns_list, &c_ns_list_len, &c_ns_list_size)
+
+    # remove all namespace declarations from the list that are referenced
+    if c_ns_list_len and c_element.type == tree.XML_ELEMENT_NODE:
+        c_node = c_element
+        while c_node and c_ns_list_len:
+            if c_node.ns:
+                for i in range(c_ns_list_len):
+                    if c_node.ns is c_ns_list[i].ns:
+                        c_ns_list_len -= 1
+                        c_ns_list[i] = c_ns_list[c_ns_list_len]
+                        #c_ns_list[c_ns_list_len] = _ns_node_ref(NULL, NULL)
+                        break
+            if c_node is c_element:
+                # continue with attributes
+                c_node = <xmlNode*>c_element.properties
+            else:
+                c_node = c_node.next
+    tree.END_FOR_EACH_ELEMENT_FROM(c_element)
+
+    if c_ns_list is NULL:
+        return 0
+
+    # free all namespace declarations that remained in the list,
+    # except for those we should keep explicitly
+    cdef xmlNs* c_nsdef
+    for i in range(c_ns_list_len):
+        if prefixes_to_keep is not None:
+            if c_ns_list[i].ns.prefix and c_ns_list[i].ns.prefix in prefixes_to_keep:
+                continue
+        c_node = c_ns_list[i].node
+        c_nsdef = c_node.nsDef
+        if c_nsdef is c_ns_list[i].ns:
+            c_node.nsDef = c_node.nsDef.next
+        else:
+            while c_nsdef.next is not c_ns_list[i].ns:
+                c_nsdef = c_nsdef.next
+            c_nsdef.next = c_nsdef.next.next
+        tree.xmlFreeNs(c_ns_list[i].ns)
+    
+    if c_ns_list is not NULL:
+        python.lxml_free(c_ns_list)
+    return 0
+
+cdef xmlNs* _searchNsByHref(xmlNode* c_node, const_xmlChar* c_href, bint is_attribute) noexcept:
+    """Search a namespace declaration that covers a node (element or
+    attribute).
+
+    For attributes, try to find a prefixed namespace declaration
+    instead of the default namespaces.  This helps in supporting
+    round-trips for attributes on elements with a different namespace.
+    """
+    cdef xmlNs* c_ns
+    cdef xmlNs* c_default_ns = NULL
+    cdef xmlNode* c_element
+    if c_href is NULL or c_node is NULL or c_node.type == tree.XML_ENTITY_REF_NODE:
+        return NULL
+    if tree.xmlStrcmp(c_href, tree.XML_XML_NAMESPACE) == 0:
+        # no special cases here, let libxml2 handle this
+        return tree.xmlSearchNsByHref(c_node.doc, c_node, c_href)
+    if c_node.type == tree.XML_ATTRIBUTE_NODE:
+        is_attribute = 1
+    while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
+        c_node = c_node.parent
+    c_element = c_node
+    while c_node is not NULL:
+        if c_node.type == tree.XML_ELEMENT_NODE:
+            c_ns = c_node.nsDef
+            while c_ns is not NULL:
+                if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
+                    if c_ns.prefix is NULL and is_attribute:
+                        # for attributes, continue searching a named
+                        # prefix, but keep the first default namespace
+                        # declaration that we found
+                        if c_default_ns is NULL:
+                            c_default_ns = c_ns
+                    elif tree.xmlSearchNs(
+                        c_element.doc, c_element, c_ns.prefix) is c_ns:
+                        # start node is in namespace scope => found!
+                        return c_ns
+                c_ns = c_ns.next
+            if c_node is not c_element and c_node.ns is not NULL:
+                # optimise: the node may have the namespace itself
+                c_ns = c_node.ns
+                if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0:
+                    if c_ns.prefix is NULL and is_attribute:
+                        # for attributes, continue searching a named
+                        # prefix, but keep the first default namespace
+                        # declaration that we found
+                        if c_default_ns is NULL:
+                            c_default_ns = c_ns
+                    elif tree.xmlSearchNs(
+                        c_element.doc, c_element, c_ns.prefix) is c_ns:
+                        # start node is in namespace scope => found!
+                        return c_ns
+        c_node = c_node.parent
+    # nothing found => use a matching default namespace or fail
+    if c_default_ns is not NULL:
+        if tree.xmlSearchNs(c_element.doc, c_element, NULL) is c_default_ns:
+            return c_default_ns
+    return NULL
+
+cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1:
+    # NOTE: this does not deallocate the node, just unlink it!
+    cdef xmlNode* c_parent
+    cdef xmlNode* c_child
+    if c_node.children is NULL:
+        tree.xmlUnlinkNode(c_node)
+        return 0
+
+    c_parent = c_node.parent
+    # fix parent links of children
+    c_child = c_node.children
+    while c_child is not NULL:
+        c_child.parent = c_parent
+        c_child = c_child.next
+
+    # fix namespace references of children if their parent's namespace
+    # declarations get lost
+    if c_node.nsDef is not NULL:
+        c_child = c_node.children
+        while c_child is not NULL:
+            moveNodeToDocument(doc, doc._c_doc, c_child)
+            c_child = c_child.next
+
+    # fix sibling links to/from child slice
+    if c_node.prev is NULL:
+        c_parent.children = c_node.children
+    else:
+        c_node.prev.next = c_node.children
+        c_node.children.prev = c_node.prev
+    if c_node.next is NULL:
+        c_parent.last = c_node.last
+    else:
+        c_node.next.prev = c_node.last
+        c_node.last.next = c_node.next
+
+    # unlink c_node
+    c_node.children = c_node.last = NULL
+    c_node.parent = c_node.next = c_node.prev = NULL
+    return 0
+
+cdef unicode _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node):
+    c_href = _getNs(<xmlNode*>c_attrib_node)
+    value = tree.xmlGetNsProp(c_element, c_attrib_node.name, c_href)
+    try:
+        result = funicode(value)
+    finally:
+        tree.xmlFree(value)
+    return result
+
+cdef unicode _attributeValueFromNsName(xmlNode* c_element,
+                                       const_xmlChar* c_href, const_xmlChar* c_name):
+    c_result = tree.xmlGetNsProp(c_element, c_name, c_href)
+    if c_result is NULL:
+        return None
+    try:
+        result = funicode(c_result)
+    finally:
+        tree.xmlFree(c_result)
+    return result
+
+cdef object _getNodeAttributeValue(xmlNode* c_node, key, default):
+    ns, tag = _getNsTag(key)
+    c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns)
+    c_result = tree.xmlGetNsProp(c_node, _xcstr(tag), c_href)
+    if c_result is NULL:
+        # XXX free namespace that is not in use..?
+        return default
+    try:
+        result = funicode(c_result)
+    finally:
+        tree.xmlFree(c_result)
+    return result
+
+cdef inline object _getAttributeValue(_Element element, key, default):
+    return _getNodeAttributeValue(element._c_node, key, default)
+
+cdef int _setAttributeValue(_Element element, key, value) except -1:
+    cdef const_xmlChar* c_value
+    cdef xmlNs* c_ns
+    ns, tag = _getNsTag(key)
+    is_html = element._doc._parser._for_html
+    if not is_html:
+        _attributeValidOrRaise(tag)
+    c_tag = _xcstr(tag)
+    if value is None and is_html:
+        c_value = NULL
+    else:
+        if isinstance(value, QName):
+            value = _resolveQNameText(element, value)
+        else:
+            value = _utf8(value)
+        c_value = _xcstr(value)
+    if ns is None:
+        c_ns = NULL
+    else:
+        c_ns = element._doc._findOrBuildNodeNs(element._c_node, _xcstr(ns), NULL, 1)
+    tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value)
+    return 0
+
+cdef int _delAttribute(_Element element, key) except -1:
+    ns, tag = _getNsTag(key)
+    c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns)
+    if _delAttributeFromNsName(element._c_node, c_href, _xcstr(tag)):
+        raise KeyError, key
+    return 0
+
+cdef int _delAttributeFromNsName(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name) noexcept:
+    c_attr = tree.xmlHasNsProp(c_node, c_name, c_href)
+    if c_attr is NULL:
+        # XXX free namespace that is not in use..?
+        return -1
+    tree.xmlRemoveProp(c_attr)
+    return 0
+
+cdef list _collectAttributes(xmlNode* c_node, int collecttype):
+    """Collect all attributes of a node in a list.  Depending on collecttype,
+    it collects either the name (1), the value (2) or the name-value tuples.
+    """
+    cdef Py_ssize_t count
+    c_attr = c_node.properties
+    count = 0
+    while c_attr is not NULL:
+        if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+            count += 1
+        c_attr = c_attr.next
+
+    if not count:
+        return []
+
+    attributes = [None] * count
+    c_attr = c_node.properties
+    count = 0
+    while c_attr is not NULL:
+        if c_attr.type == tree.XML_ATTRIBUTE_NODE:
+            if collecttype == 1:
+                item = _namespacedName(<xmlNode*>c_attr)
+            elif collecttype == 2:
+                item = _attributeValue(c_node, c_attr)
+            else:
+                item = (_namespacedName(<xmlNode*>c_attr),
+                        _attributeValue(c_node, c_attr))
+            attributes[count] = item
+            count += 1
+        c_attr = c_attr.next
+    return attributes
+
+cdef object __RE_XML_ENCODING = re.compile(
+    r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U)
+
+cdef object __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub
+cdef object __HAS_XML_ENCODING = __RE_XML_ENCODING.match
+
+cdef object _stripEncodingDeclaration(object xml_string):
+    # this is a hack to remove the XML encoding declaration from unicode
+    return __REPLACE_XML_ENCODING(r'\g<1>\g<2>', xml_string)
+
+cdef bint _hasEncodingDeclaration(object xml_string) except -1:
+    # check if a (unicode) string has an XML encoding declaration
+    return __HAS_XML_ENCODING(xml_string) is not None
+
+cdef inline bint _hasText(xmlNode* c_node) noexcept:
+    return c_node is not NULL and _textNodeOrSkip(c_node.children) is not NULL
+
+cdef inline bint _hasTail(xmlNode* c_node) noexcept:
+    return c_node is not NULL and _textNodeOrSkip(c_node.next) is not NULL
+
+cdef inline bint _hasNonWhitespaceTail(xmlNode* c_node) except -1:
+    return _hasNonWhitespaceText(c_node, tail=True)
+
+cdef bint _hasNonWhitespaceText(xmlNode* c_node, bint tail=False) except -1:
+    c_text_node = c_node and _textNodeOrSkip(c_node.next if tail else c_node.children)
+    if c_text_node is NULL:
+        return False
+    while c_text_node is not NULL:
+        if c_text_node.content[0] != c'\0' and not _collectText(c_text_node).isspace():
+            return True
+        c_text_node = _textNodeOrSkip(c_text_node.next)
+    return False
+
+cdef unicode _collectText(xmlNode* c_node):
+    """Collect all text nodes and return them as a unicode string.
+
+    Start collecting at c_node.
+    
+    If there was no text to collect, return None
+    """
+    cdef Py_ssize_t scount
+    cdef xmlChar* c_text
+    cdef xmlNode* c_node_cur
+    # check for multiple text nodes
+    scount = 0
+    c_text = NULL
+    c_node_cur = c_node = _textNodeOrSkip(c_node)
+    while c_node_cur is not NULL:
+        if c_node_cur.content[0] != c'\0':
+            c_text = c_node_cur.content
+        scount += 1
+        c_node_cur = _textNodeOrSkip(c_node_cur.next)
+
+    # handle two most common cases first
+    if c_text is NULL:
+        return '' if scount > 0 else None
+    if scount == 1:
+        return funicode(c_text)
+
+    # the rest is not performance critical anymore
+    result = b''
+    while c_node is not NULL:
+        result += <unsigned char*>c_node.content
+        c_node = _textNodeOrSkip(c_node.next)
+    return funicode(<const_xmlChar*><unsigned char*>result)
+
+cdef void _removeText(xmlNode* c_node) noexcept:
+    """Remove all text nodes.
+
+    Start removing at c_node.
+    """
+    cdef xmlNode* c_next
+    c_node = _textNodeOrSkip(c_node)
+    while c_node is not NULL:
+        c_next = _textNodeOrSkip(c_node.next)
+        tree.xmlUnlinkNode(c_node)
+        tree.xmlFreeNode(c_node)
+        c_node = c_next
+
+cdef xmlNode* _createTextNode(xmlDoc* doc, value) except NULL:
+    cdef xmlNode* c_text_node
+    if isinstance(value, CDATA):
+        c_text_node = tree.xmlNewCDataBlock(
+            doc, _xcstr((<CDATA>value)._utf8_data),
+            python.PyBytes_GET_SIZE((<CDATA>value)._utf8_data))
+    else:
+        text = _utf8(value)
+        c_text_node = tree.xmlNewDocText(doc, _xcstr(text))
+    if not c_text_node:
+        raise MemoryError()
+    return c_text_node
+
+cdef int _setNodeText(xmlNode* c_node, value) except -1:
+    # remove all text nodes at the start first
+    _removeText(c_node.children)
+    if value is None:
+        return 0
+    # now add new text node with value at start
+    c_text_node = _createTextNode(c_node.doc, value)
+    if c_node.children is NULL:
+        tree.xmlAddChild(c_node, c_text_node)
+    else:
+        tree.xmlAddPrevSibling(c_node.children, c_text_node)
+    return 0
+
+cdef int _setTailText(xmlNode* c_node, value) except -1:
+    # remove all text nodes at the start first
+    _removeText(c_node.next)
+    if value is None:
+        return 0
+    # now append new text node with value
+    c_text_node = _createTextNode(c_node.doc, value)
+    tree.xmlAddNextSibling(c_node, c_text_node)
+    return 0
+
+cdef bytes _resolveQNameText(_Element element, value):
+    cdef xmlNs* c_ns
+    ns, tag = _getNsTag(value)
+    if ns is None:
+        return tag
+    else:
+        c_ns = element._doc._findOrBuildNodeNs(
+            element._c_node, _xcstr(ns), NULL, 0)
+        return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag))
+
+cdef inline bint _hasChild(xmlNode* c_node) noexcept:
+    return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL
+
+cdef inline Py_ssize_t _countElements(xmlNode* c_node) noexcept:
+    "Counts the elements within the following siblings and the node itself."
+    cdef Py_ssize_t count
+    count = 0
+    while c_node is not NULL:
+        if _isElement(c_node):
+            count += 1
+        c_node = c_node.next
+    return count
+
+cdef int _findChildSlice(
+    slice sliceobject, xmlNode* c_parent,
+    xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1:
+    """Resolve a children slice.
+
+    Returns the start node, step size and the slice length in the
+    pointer arguments.
+    """
+    cdef Py_ssize_t start = 0, stop = 0, childcount
+    childcount = _countElements(c_parent.children)
+    if childcount == 0:
+        c_start_node[0] = NULL
+        c_length[0] = 0
+        if sliceobject.step is None:
+            c_step[0] = 1
+        else:
+            python._PyEval_SliceIndex(sliceobject.step, c_step)
+        return 0
+    python.PySlice_GetIndicesEx(
+        sliceobject, childcount, &start, &stop, c_step, c_length)
+    if start > childcount // 2:
+        c_start_node[0] = _findChildBackwards(c_parent, childcount - start - 1)
+    else:
+        c_start_node[0] = _findChild(c_parent, start)
+    return 0
+
+cdef bint _isFullSlice(slice sliceobject) except -1:
+    """Conservative guess if this slice is a full slice as in ``s[:]``.
+    """
+    cdef Py_ssize_t step = 0
+    if sliceobject is None:
+        return 0
+    if sliceobject.start is None and \
+            sliceobject.stop is None:
+        if sliceobject.step is None:
+            return 1
+        python._PyEval_SliceIndex(sliceobject.step, &step)
+        if step == 1:
+            return 1
+        return 0
+    return 0
+
+cdef _collectChildren(_Element element):
+    cdef xmlNode* c_node
+    cdef list result = []
+    c_node = element._c_node.children
+    if c_node is not NULL:
+        if not _isElement(c_node):
+            c_node = _nextElement(c_node)
+        while c_node is not NULL:
+            result.append(_elementFactory(element._doc, c_node))
+            c_node = _nextElement(c_node)
+    return result
+
+cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index) noexcept:
+    if index < 0:
+        return _findChildBackwards(c_node, -index - 1)
+    else:
+        return _findChildForwards(c_node, index)
+    
+cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index) noexcept:
+    """Return child element of c_node with index, or return NULL if not found.
+    """
+    cdef xmlNode* c_child
+    cdef Py_ssize_t c
+    c_child = c_node.children
+    c = 0
+    while c_child is not NULL:
+        if _isElement(c_child):
+            if c == index:
+                return c_child
+            c += 1
+        c_child = c_child.next
+    return NULL
+
+cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index) noexcept:
+    """Return child element of c_node with index, or return NULL if not found.
+    Search from the end.
+    """
+    cdef xmlNode* c_child
+    cdef Py_ssize_t c
+    c_child = c_node.last
+    c = 0
+    while c_child is not NULL:
+        if _isElement(c_child):
+            if c == index:
+                return c_child
+            c += 1
+        c_child = c_child.prev
+    return NULL
+    
+cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) noexcept nogil:
+    """Return the node if it's a text node.  Skip over ignorable nodes in a
+    series of text nodes.  Return NULL if a non-ignorable node is found.
+
+    This is used to skip over XInclude nodes when collecting adjacent text
+    nodes.
+    """
+    while c_node is not NULL:
+        if c_node.type == tree.XML_TEXT_NODE or \
+               c_node.type == tree.XML_CDATA_SECTION_NODE:
+            return c_node
+        elif c_node.type == tree.XML_XINCLUDE_START or \
+                 c_node.type == tree.XML_XINCLUDE_END:
+            c_node = c_node.next
+        else:
+            return NULL
+    return NULL
+
+cdef inline xmlNode* _nextElement(xmlNode* c_node) noexcept:
+    """Given a node, find the next sibling that is an element.
+    """
+    if c_node is NULL:
+        return NULL
+    c_node = c_node.next
+    while c_node is not NULL:
+        if _isElement(c_node):
+            return c_node
+        c_node = c_node.next
+    return NULL
+
+cdef inline xmlNode* _previousElement(xmlNode* c_node) noexcept:
+    """Given a node, find the next sibling that is an element.
+    """
+    if c_node is NULL:
+        return NULL
+    c_node = c_node.prev
+    while c_node is not NULL:
+        if _isElement(c_node):
+            return c_node
+        c_node = c_node.prev
+    return NULL
+
+cdef inline xmlNode* _parentElement(xmlNode* c_node) noexcept:
+    "Given a node, find the parent element."
+    if c_node is NULL or not _isElement(c_node):
+        return NULL
+    c_node = c_node.parent
+    if c_node is NULL or not _isElement(c_node):
+        return NULL
+    return c_node
+
+cdef inline bint _tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name) noexcept:
+    """Tests if the node matches namespace URI and tag name.
+
+    A node matches if it matches both c_href and c_name.
+
+    A node matches c_href if any of the following is true:
+    * c_href is NULL
+    * its namespace is NULL and c_href is the empty string
+    * its namespace string equals the c_href string
+
+    A node matches c_name if any of the following is true:
+    * c_name is NULL
+    * its name string equals the c_name string
+    """
+    if c_node is NULL:
+        return 0
+    if c_node.type != tree.XML_ELEMENT_NODE:
+        # not an element, only succeed if we match everything
+        return c_name is NULL and c_href is NULL
+    if c_name is NULL:
+        if c_href is NULL:
+            # always match
+            return 1
+        else:
+            c_node_href = _getNs(c_node)
+            if c_node_href is NULL:
+                return c_href[0] == c'\0'
+            else:
+                return tree.xmlStrcmp(c_node_href, c_href) == 0
+    elif c_href is NULL:
+        if _getNs(c_node) is not NULL:
+            return 0
+        return c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0
+    elif c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0:
+        c_node_href = _getNs(c_node)
+        if c_node_href is NULL:
+            return c_href[0] == c'\0'
+        else:
+            return tree.xmlStrcmp(c_node_href, c_href) == 0
+    else:
+        return 0
+
+cdef inline bint _tagMatchesExactly(xmlNode* c_node, qname* c_qname) noexcept:
+    """Tests if the node matches namespace URI and tag name.
+
+    This differs from _tagMatches() in that it does not consider a
+    NULL value in qname.href a wildcard, and that it expects the c_name
+    to be taken from the doc dict, i.e. it only compares the names by
+    address.
+
+    A node matches if it matches both href and c_name of the qname.
+
+    A node matches c_href if any of the following is true:
+    * its namespace is NULL and c_href is the empty string
+    * its namespace string equals the c_href string
+
+    A node matches c_name if any of the following is true:
+    * c_name is NULL
+    * its name string points to the same address (!) as c_name
+    """
+    return _nsTagMatchesExactly(_getNs(c_node), c_node.name, c_qname)
+
+cdef inline bint _nsTagMatchesExactly(const_xmlChar* c_node_href,
+                                      const_xmlChar* c_node_name,
+                                      qname* c_qname) noexcept:
+    """Tests if name and namespace URI match those of c_qname.
+
+    This differs from _tagMatches() in that it does not consider a
+    NULL value in qname.href a wildcard, and that it expects the c_name
+    to be taken from the doc dict, i.e. it only compares the names by
+    address.
+
+    A node matches if it matches both href and c_name of the qname.
+
+    A node matches c_href if any of the following is true:
+    * its namespace is NULL and c_href is the empty string
+    * its namespace string equals the c_href string
+
+    A node matches c_name if any of the following is true:
+    * c_name is NULL
+    * its name string points to the same address (!) as c_name
+    """
+    cdef char* c_href
+    if c_qname.c_name is not NULL and c_qname.c_name is not c_node_name:
+        return 0
+    if c_qname.href is NULL:
+        return 1
+    c_href = python.__cstr(c_qname.href)
+    if c_href[0] == b'\0':
+        return c_node_href is NULL or c_node_href[0] == b'\0'
+    elif c_node_href is NULL:
+        return 0
+    else:
+        return tree.xmlStrcmp(<const_xmlChar*>c_href, c_node_href) == 0
+
+cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags,
+                                          qname* c_ns_tags, bint force_into_dict) except -1:
+    """Map a sequence of (name, namespace) pairs to a qname array for efficient
+    matching with _tagMatchesExactly() above.
+
+    Note that each qname struct in the array owns its href byte string object
+    if it is not NULL.
+    """
+    cdef Py_ssize_t count = 0, i
+    cdef bytes ns, tag
+    for ns, tag in ns_tags:
+        if tag is None:
+            c_tag = <const_xmlChar*>NULL
+        elif force_into_dict:
+            c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag))
+            if c_tag is NULL:
+                # clean up before raising the error
+                for i in xrange(count):
+                    cpython.ref.Py_XDECREF(c_ns_tags[i].href)
+                raise MemoryError()
+        else:
+            c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), len(tag))
+            if c_tag is NULL:
+                # not in the dict => not in the document
+                continue
+        c_ns_tags[count].c_name = c_tag
+        if ns is None:
+            c_ns_tags[count].href = NULL
+        else:
+            cpython.ref.Py_INCREF(ns) # keep an owned reference!
+            c_ns_tags[count].href = <python.PyObject*>ns
+        count += 1
+    return count
+
+cdef int _removeNode(_Document doc, xmlNode* c_node) except -1:
+    """Unlink and free a node and subnodes if possible.  Otherwise, make sure
+    it's self-contained.
+    """
+    cdef xmlNode* c_next
+    c_next = c_node.next
+    tree.xmlUnlinkNode(c_node)
+    _moveTail(c_next, c_node)
+    if not attemptDeallocation(c_node):
+        # make namespaces absolute
+        moveNodeToDocument(doc, c_node.doc, c_node)
+    return 0
+
+cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint with_tail) except -1:
+    cdef xmlNode* c_node
+    cdef xmlNode* c_next
+    c_node = c_element.next
+    while c_node is not NULL:
+        c_next = _nextElement(c_node)
+        if c_node.type == node_type:
+            if with_tail:
+                _removeText(c_node.next)
+            tree.xmlUnlinkNode(c_node)
+            attemptDeallocation(c_node)
+        c_node = c_next
+    c_node = c_element.prev
+    while c_node is not NULL:
+        c_next = _previousElement(c_node)
+        if c_node.type == node_type:
+            if with_tail:
+                _removeText(c_node.next)
+            tree.xmlUnlinkNode(c_node)
+            attemptDeallocation(c_node)
+        c_node = c_next
+    return 0
+
+cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target) noexcept:
+    cdef xmlNode* c_next
+    # tail support: look for any text nodes trailing this node and 
+    # move them too
+    c_tail = _textNodeOrSkip(c_tail)
+    while c_tail is not NULL:
+        c_next = _textNodeOrSkip(c_tail.next)
+        c_target = tree.xmlAddNextSibling(c_target, c_tail)
+        c_tail = c_next
+
+cdef int _copyTail(xmlNode* c_tail, xmlNode* c_target) except -1:
+    cdef xmlNode* c_new_tail
+    # tail copying support: look for any text nodes trailing this node and
+    # copy it to the target node
+    c_tail = _textNodeOrSkip(c_tail)
+    while c_tail is not NULL:
+        if c_target.doc is not c_tail.doc:
+            c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0)
+        else:
+            c_new_tail = tree.xmlCopyNode(c_tail, 0)
+        if c_new_tail is NULL:
+            raise MemoryError()
+        c_target = tree.xmlAddNextSibling(c_target, c_new_tail)
+        c_tail = _textNodeOrSkip(c_tail.next)
+    return 0
+
+cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1:
+    cdef xmlNode* c_copy
+    cdef xmlNode* c_sibling = c_node
+    while c_sibling.prev != NULL and \
+            (c_sibling.prev.type == tree.XML_PI_NODE or
+             c_sibling.prev.type == tree.XML_COMMENT_NODE or
+             c_sibling.prev.type == tree.XML_DTD_NODE):
+        c_sibling = c_sibling.prev
+    while c_sibling != c_node:
+        if c_sibling.type == tree.XML_DTD_NODE:
+            c_copy = <xmlNode*>_copyDtd(<tree.xmlDtd*>c_sibling)
+            if c_sibling == <xmlNode*>c_node.doc.intSubset:
+                c_target.doc.intSubset = <tree.xmlDtd*>c_copy
+            else: # c_sibling == c_node.doc.extSubset
+                c_target.doc.extSubset = <tree.xmlDtd*>c_copy
+        else:
+            c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
+            if c_copy is NULL:
+                raise MemoryError()
+        tree.xmlAddPrevSibling(c_target, c_copy)
+        c_sibling = c_sibling.next
+    while c_sibling.next != NULL and \
+            (c_sibling.next.type == tree.XML_PI_NODE or
+             c_sibling.next.type == tree.XML_COMMENT_NODE):
+        c_sibling = c_sibling.next
+        c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1)
+        if c_copy is NULL:
+            raise MemoryError()
+        tree.xmlAddNextSibling(c_target, c_copy)
+
+cdef int _deleteSlice(_Document doc, xmlNode* c_node,
+                      Py_ssize_t count, Py_ssize_t step) except -1:
+    """Delete slice, ``count`` items starting with ``c_node`` with a step
+    width of ``step``.
+    """
+    cdef xmlNode* c_next
+    cdef Py_ssize_t c, i
+    cdef _node_to_node_function next_element
+    if c_node is NULL:
+        return 0
+    if step > 0:
+        next_element = _nextElement
+    else:
+        step = -step
+        next_element = _previousElement
+    # now start deleting nodes
+    c = 0
+    c_next = c_node
+    while c_node is not NULL and c < count:
+        for i in range(step):
+            c_next = next_element(c_next)
+            if c_next is NULL:
+                break
+        _removeNode(doc, c_node)
+        c += 1
+        c_node = c_next
+    return 0
+
+cdef int _replaceSlice(_Element parent, xmlNode* c_node,
+                       Py_ssize_t slicelength, Py_ssize_t step,
+                       bint left_to_right, elements) except -1:
+    """Replace the slice of ``count`` elements starting at ``c_node`` with
+    positive step width ``step`` by the Elements in ``elements``.  The
+    direction is given by the boolean argument ``left_to_right``.
+
+    ``c_node`` may be NULL to indicate the end of the children list.
+    """
+    cdef xmlNode* c_orig_neighbour
+    cdef xmlNode* c_next
+    cdef xmlDoc*  c_source_doc
+    cdef _Element element
+    cdef Py_ssize_t seqlength, i, c
+    cdef _node_to_node_function next_element
+    assert step > 0
+    if left_to_right:
+        next_element = _nextElement
+    else:
+        next_element = _previousElement
+
+    if not isinstance(elements, (list, tuple)):
+        elements = list(elements)
+
+    if step != 1 or not left_to_right:
+        # *replacing* children stepwise with list => check size!
+        seqlength = len(elements)
+        if seqlength != slicelength:
+            raise ValueError, f"attempt to assign sequence of size {seqlength} " \
+                f"to extended slice of size {slicelength}"
+
+    if c_node is NULL:
+        # no children yet => add all elements straight away
+        if left_to_right:
+            for element in elements:
+                assert element is not None, "Node must not be None"
+                _appendChild(parent, element)
+        else:
+            for element in elements:
+                assert element is not None, "Node must not be None"
+                _prependChild(parent, element)
+        return 0
+
+    # remove the elements first as some might be re-added
+    if left_to_right:
+        # L->R, remember left neighbour
+        c_orig_neighbour = _previousElement(c_node)
+    else:
+        # R->L, remember right neighbour
+        c_orig_neighbour = _nextElement(c_node)
+
+    # We remove the original slice elements one by one. Since we hold
+    # a Python reference to all elements that we will insert, it is
+    # safe to let _removeNode() try (and fail) to free them even if
+    # the element itself or one of its descendents will be reinserted.
+    c = 0
+    c_next = c_node
+    while c_node is not NULL and c < slicelength:
+        for i in range(step):
+            c_next = next_element(c_next)
+            if c_next is NULL:
+                break
+        _removeNode(parent._doc, c_node)
+        c += 1
+        c_node = c_next
+
+    # make sure each element is inserted only once
+    elements = iter(elements)
+
+    # find the first node right of the new insertion point
+    if left_to_right:
+        if c_orig_neighbour is not NULL:
+            c_node = next_element(c_orig_neighbour)
+        else:
+            # before the first element
+            c_node = _findChildForwards(parent._c_node, 0)
+    elif c_orig_neighbour is NULL:
+        # at the end, but reversed stepping
+        # append one element and go to the next insertion point
+        for element in elements:
+            assert element is not None, "Node must not be None"
+            _appendChild(parent, element)
+            c_node = element._c_node
+            if slicelength > 0:
+                slicelength -= 1
+                for i in range(1, step):
+                    c_node = next_element(c_node)
+                    if c_node is NULL:
+                        break
+            break
+    else:
+        c_node = c_orig_neighbour
+
+    if left_to_right:
+        # adjust step size after removing slice as we are not stepping
+        # over the newly inserted elements
+        step -= 1
+
+    # now insert elements where we removed them
+    if c_node is not NULL:
+        for element in elements:
+            assert element is not None, "Node must not be None"
+            _assertValidNode(element)
+            # move element and tail over
+            c_source_doc = element._c_node.doc
+            c_next = element._c_node.next
+            tree.xmlAddPrevSibling(c_node, element._c_node)
+            _moveTail(c_next, element._c_node)
+
+            # integrate element into new document
+            moveNodeToDocument(parent._doc, c_source_doc, element._c_node)
+
+            # stop at the end of the slice
+            if slicelength > 0:
+                slicelength -= 1
+                for i in range(step):
+                    c_node = next_element(c_node)
+                    if c_node is NULL:
+                        break
+                if c_node is NULL:
+                    break
+        else:
+            # everything inserted
+            return 0
+
+    # append the remaining elements at the respective end
+    if left_to_right:
+        for element in elements:
+            assert element is not None, "Node must not be None"
+            _assertValidNode(element)
+            _appendChild(parent, element)
+    else:
+        for element in elements:
+            assert element is not None, "Node must not be None"
+            _assertValidNode(element)
+            _prependChild(parent, element)
+
+    return 0
+
+
+cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1:
+    """Adaptation of 'xmlAddChild()' that deep-fix the document links iteratively.
+    """
+    assert _isElement(c_node)
+    c_node.parent = c_parent
+    if c_parent.children is NULL:
+        c_parent.children = c_parent.last = c_node
+    else:
+        c_node.prev = c_parent.last
+        c_parent.last.next = c_node
+        c_parent.last = c_node
+
+    _setTreeDoc(c_node, c_parent.doc)
+    return 0
+
+
+cdef int _appendChild(_Element parent, _Element child) except -1:
+    """Append a new child to a parent element.
+    """
+    c_node = child._c_node
+    c_source_doc = c_node.doc
+    # prevent cycles
+    if _isAncestorOrSame(c_node, parent._c_node):
+        raise ValueError("cannot append parent to itself")
+    # store possible text node
+    c_next = c_node.next
+    # move node itself
+    tree.xmlUnlinkNode(c_node)
+    # do not call xmlAddChild() here since it would deep-traverse the tree
+    _linkChild(parent._c_node, c_node)
+    _moveTail(c_next, c_node)
+    # uh oh, elements may be pointing to different doc when
+    # parent element has moved; change them too..
+    moveNodeToDocument(parent._doc, c_source_doc, c_node)
+    return 0
+
+cdef int _prependChild(_Element parent, _Element child) except -1:
+    """Prepend a new child to a parent element.
+    """
+    c_node = child._c_node
+    c_source_doc = c_node.doc
+    # prevent cycles
+    if _isAncestorOrSame(c_node, parent._c_node):
+        raise ValueError("cannot append parent to itself")
+    # store possible text node
+    c_next = c_node.next
+    # move node itself
+    c_child = _findChildForwards(parent._c_node, 0)
+    if c_child is NULL:
+        tree.xmlUnlinkNode(c_node)
+        # do not call xmlAddChild() here since it would deep-traverse the tree
+        _linkChild(parent._c_node, c_node)
+    else:
+        tree.xmlAddPrevSibling(c_child, c_node)
+    _moveTail(c_next, c_node)
+    # uh oh, elements may be pointing to different doc when
+    # parent element has moved; change them too..
+    moveNodeToDocument(parent._doc, c_source_doc, c_node)
+    return 0
+
+cdef int _appendSibling(_Element element, _Element sibling) except -1:
+    """Add a new sibling behind an element.
+    """
+    return _addSibling(element, sibling, as_next=True)
+
+cdef int _prependSibling(_Element element, _Element sibling) except -1:
+    """Add a new sibling before an element.
+    """
+    return _addSibling(element, sibling, as_next=False)
+
+cdef int _addSibling(_Element element, _Element sibling, bint as_next) except -1:
+    c_node = sibling._c_node
+    c_source_doc = c_node.doc
+    # prevent cycles
+    if _isAncestorOrSame(c_node, element._c_node):
+        if element._c_node is c_node:
+            return 0  # nothing to do
+        raise ValueError("cannot add ancestor as sibling, please break cycle first")
+    # store possible text node
+    c_next = c_node.next
+    # move node itself
+    if as_next:
+        # must insert after any tail text
+        c_next_node = _nextElement(element._c_node)
+        if c_next_node is NULL:
+            c_next_node = element._c_node
+            while c_next_node.next:
+                c_next_node = c_next_node.next
+            tree.xmlAddNextSibling(c_next_node, c_node)
+        else:
+            tree.xmlAddPrevSibling(c_next_node, c_node)
+    else:
+        tree.xmlAddPrevSibling(element._c_node, c_node)
+    _moveTail(c_next, c_node)
+    # uh oh, elements may be pointing to different doc when
+    # parent element has moved; change them too..
+    moveNodeToDocument(element._doc, c_source_doc, c_node)
+    return 0
+
+cdef inline bint isutf8(const_xmlChar* s) noexcept:
+    cdef xmlChar c = s[0]
+    while c != c'\0':
+        if c & 0x80:
+            return True
+        s += 1
+        c = s[0]
+    return False
+
+cdef bint isutf8l(const_xmlChar* s, size_t length) noexcept:
+    """
+    Search for non-ASCII characters in the string, knowing its length in advance.
+    """
+    cdef unsigned int i
+    cdef unsigned long non_ascii_mask
+    cdef const unsigned long *lptr = <const unsigned long*> s
+
+    cdef const unsigned long *end = lptr + length // sizeof(unsigned long)
+    if length >= sizeof(non_ascii_mask):
+        # Build constant 0x80808080... mask (and let the C compiler fold it).
+        non_ascii_mask = 0
+        for i in range(sizeof(non_ascii_mask) // 2):
+            non_ascii_mask = (non_ascii_mask << 16) | 0x8080
+
+        # Advance to long-aligned character before we start reading longs.
+        while (<size_t>s) % sizeof(unsigned long) and s < <const_xmlChar *>end:
+            if s[0] & 0x80:
+                return True
+            s += 1
+
+        # Read one long at a time
+        lptr = <const unsigned long*> s
+        while lptr < end:
+            if lptr[0] & non_ascii_mask:
+                return True
+            lptr += 1
+        s = <const_xmlChar *>lptr
+
+    while s < (<const_xmlChar *>end + length % sizeof(unsigned long)):
+        if s[0] & 0x80:
+            return True
+        s += 1
+
+    return False
+
+cdef int _is_valid_xml_ascii(bytes pystring) except -1:
+    """Check if a string is XML ascii content."""
+    cdef signed char ch
+    # When ch is a *signed* char, non-ascii characters are negative integers
+    # and xmlIsChar_ch does not accept them.
+    for ch in pystring:
+        if not tree.xmlIsChar_ch(ch):
+            return 0
+    return 1
+
+cdef bint _is_valid_xml_utf8(bytes pystring) except -1:
+    """Check if a string is like valid UTF-8 XML content."""
+    cdef const_xmlChar* s = _xcstr(pystring)
+    cdef const_xmlChar* c_end = s + len(pystring)
+    cdef unsigned long next3 = 0
+    if s < c_end - 2:
+        next3 = (s[0] << 8) | (s[1])
+
+    while s < c_end - 2:
+        next3 = 0x00ffffff & ((next3 << 8) | s[2])
+        if s[0] & 0x80:
+            # 0xefbfbe and 0xefbfbf are utf-8 encodings of
+            # forbidden characters \ufffe and \uffff
+            if next3 == 0x00efbfbe or next3 == 0x00efbfbf:
+                return 0
+            # 0xeda080 and 0xedbfbf are utf-8 encodings of
+            # \ud800 and \udfff. Anything between them (inclusive)
+            # is forbidden, because they are surrogate blocks in utf-16.
+            if 0x00eda080 <= next3 <= 0x00edbfbf:
+                return 0
+        elif not tree.xmlIsChar_ch(s[0]):
+            return 0  # invalid ascii char
+        s += 1
+
+    while s < c_end:
+        if not s[0] & 0x80 and not tree.xmlIsChar_ch(s[0]):
+            return 0  # invalid ascii char
+        s += 1
+
+    return 1
+
+cdef inline unicode funicodeOrNone(const_xmlChar* s):
+    return funicode(s) if s is not NULL else None
+
+cdef inline unicode funicodeOrEmpty(const_xmlChar* s):
+    return funicode(s) if s is not NULL else ''
+
+cdef unicode funicode(const_xmlChar* s):
+    return s.decode('UTF-8')
+
+cdef bytes _utf8(object s):
+    """Test if a string is valid user input and encode it to UTF-8.
+    Reject all bytes/unicode input that contains non-XML characters.
+    Reject all bytes input that contains non-ASCII characters.
+    """
+    cdef int valid
+    cdef bytes utf8_string
+    if isinstance(s, unicode):
+        utf8_string = (<unicode>s).encode('utf8')
+        valid = _is_valid_xml_utf8(utf8_string)
+    elif isinstance(s, (bytes, bytearray)):
+        utf8_string = s if type(s) is bytes else bytes(s)
+        valid = _is_valid_xml_ascii(utf8_string)
+    else:
+        raise TypeError("Argument must be bytes or unicode, got '%.200s'" % type(s).__name__)
+    if not valid:
+        raise ValueError(
+            "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters")
+    return utf8_string
+
+
+cdef bytes _utf8orNone(object s):
+    return _utf8(s) if s is not None else None
+
+
+cdef enum:
+    NO_FILE_PATH = 0
+    ABS_UNIX_FILE_PATH = 1
+    ABS_WIN_FILE_PATH = 2
+    REL_FILE_PATH = 3
+
+
+cdef bint _isFilePath(const_xmlChar* c_path) noexcept:
+    "simple heuristic to see if a path is a filename"
+    cdef xmlChar c
+    # test if it looks like an absolute Unix path or a Windows network path
+    if c_path[0] == c'/':
+        return ABS_UNIX_FILE_PATH
+
+    # test if it looks like an absolute Windows path or URL
+    if c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z':
+        c_path += 1
+        if c_path[0] == c':' and c_path[1] in b'\0\\':
+            return ABS_WIN_FILE_PATH  # C: or C:\...
+
+        # test if it looks like a URL with scheme://
+        while c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z':
+            c_path += 1
+        if c_path[0] == c':' and c_path[1] == c'/' and c_path[2] == c'/':
+            return NO_FILE_PATH
+
+    # assume it's a relative path
+    return REL_FILE_PATH
+
+
+cdef object _getFSPathOrObject(object obj):
+    """
+    Get the __fspath__ attribute of an object if it exists.
+    Otherwise, the original object is returned.
+    """
+    if _isString(obj):
+        return obj
+    try:
+        return python.PyOS_FSPath(obj)
+    except TypeError:
+        return obj
+
+
+cdef object _encodeFilename(object filename):
+    """Make sure a filename is 8-bit encoded (or None).
+    """
+    if filename is None:
+        return None
+    elif isinstance(filename, bytes):
+        return filename
+    elif isinstance(filename, unicode):
+        filename8 = (<unicode>filename).encode('utf8')
+        if _isFilePath(<unsigned char*>filename8):
+            try:
+                return python.PyUnicode_AsEncodedString(
+                    filename, _C_FILENAME_ENCODING, NULL)
+            except UnicodeEncodeError:
+                pass
+        return filename8
+    else:
+        raise TypeError("Argument must be string or unicode.")
+
+cdef object _decodeFilename(const_xmlChar* c_path):
+    """Make the filename a unicode string if we are in Py3.
+    """
+    return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path))
+
+cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len):
+    """Make the filename a unicode string if we are in Py3.
+    """
+    if _isFilePath(c_path):
+        try:
+            return python.PyUnicode_Decode(
+                <const_char*>c_path, c_len, _C_FILENAME_ENCODING, NULL)
+        except UnicodeDecodeError:
+            pass
+    try:
+        return (<unsigned char*>c_path)[:c_len].decode('UTF-8')
+    except UnicodeDecodeError:
+        # this is a stupid fallback, but it might still work...
+        return (<unsigned char*>c_path)[:c_len].decode('latin-1', 'replace')
+
+cdef object _encodeFilenameUTF8(object filename):
+    """Recode filename as UTF-8. Tries ASCII, local filesystem encoding and
+    UTF-8 as source encoding.
+    """
+    cdef char* c_filename
+    if filename is None:
+        return None
+    elif isinstance(filename, bytes):
+        if not isutf8l(<bytes>filename, len(<bytes>filename)):
+            # plain ASCII!
+            return filename
+        c_filename = _cstr(<bytes>filename)
+        try:
+            # try to decode with default encoding
+            filename = python.PyUnicode_Decode(
+                c_filename, len(<bytes>filename),
+                _C_FILENAME_ENCODING, NULL)
+        except UnicodeDecodeError as decode_exc:
+            try:
+                # try if it's proper UTF-8
+                (<bytes>filename).decode('utf8')
+                return filename
+            except UnicodeDecodeError:
+                raise decode_exc # otherwise re-raise original exception
+    if isinstance(filename, unicode):
+        return (<unicode>filename).encode('utf8')
+    else:
+        raise TypeError("Argument must be string or unicode.")
+
+cdef tuple _getNsTag(tag):
+    """Given a tag, find namespace URI and tag name.
+    Return None for NS uri if no namespace URI provided.
+    """
+    return __getNsTag(tag, 0)
+
+cdef tuple _getNsTagWithEmptyNs(tag):
+    """Given a tag, find namespace URI and tag name.  Return None for NS uri
+    if no namespace URI provided, or the empty string if namespace
+    part is '{}'.
+    """
+    return __getNsTag(tag, 1)
+
+cdef tuple __getNsTag(tag, bint empty_ns):
+    cdef char* c_tag
+    cdef char* c_ns_end
+    cdef Py_ssize_t taglen
+    cdef Py_ssize_t nslen
+    cdef bytes ns = None
+    # _isString() is much faster than isinstance()
+    if not _isString(tag) and isinstance(tag, QName):
+        tag = (<QName>tag).text
+    tag = _utf8(tag)
+    c_tag = _cstr(tag)
+    if c_tag[0] == c'{':
+        c_tag += 1
+        c_ns_end = cstring_h.strchr(c_tag, c'}')
+        if c_ns_end is NULL:
+            raise ValueError, "Invalid tag name"
+        nslen  = c_ns_end - c_tag
+        taglen = python.PyBytes_GET_SIZE(tag) - nslen - 2
+        if taglen == 0:
+            raise ValueError, "Empty tag name"
+        if nslen > 0:
+            ns = <bytes>c_tag[:nslen]
+        elif empty_ns:
+            ns = b''
+        tag = <bytes>c_ns_end[1:taglen+1]
+    elif python.PyBytes_GET_SIZE(tag) == 0:
+        raise ValueError, "Empty tag name"
+    return ns, tag
+
+cdef inline int _pyXmlNameIsValid(name_utf8):
+    return _xmlNameIsValid(_xcstr(name_utf8)) and b':' not in name_utf8
+
+cdef inline int _pyHtmlNameIsValid(name_utf8):
+    return _htmlNameIsValid(_xcstr(name_utf8))
+
+cdef inline int _xmlNameIsValid(const_xmlChar* c_name) noexcept:
+    return tree.xmlValidateNameValue(c_name)
+
+cdef int _htmlNameIsValid(const_xmlChar* c_name) noexcept:
+    if c_name is NULL or c_name[0] == c'\0':
+        return 0
+    while c_name[0] != c'\0':
+        if c_name[0] in b'&<>/"\'\t\n\x0B\x0C\r ':
+            return 0
+        c_name += 1
+    return 1
+
+cdef bint _characterReferenceIsValid(const_xmlChar* c_name) noexcept:
+    cdef bint is_hex
+    if c_name[0] == c'x':
+        c_name += 1
+        is_hex = 1
+    else:
+        is_hex = 0
+    if c_name[0] == c'\0':
+        return 0
+    while c_name[0] != c'\0':
+        if c_name[0] < c'0' or c_name[0] > c'9':
+            if not is_hex:
+                return 0
+            if not (c'a' <= c_name[0] <= c'f'):
+                if not (c'A' <= c_name[0] <= c'F'):
+                    return 0
+        c_name += 1
+    return 1
+
+cdef int _tagValidOrRaise(tag_utf) except -1:
+    if not _pyXmlNameIsValid(tag_utf):
+        raise ValueError(f"Invalid tag name {(<bytes>tag_utf).decode('utf8')!r}")
+    return 0
+
+cdef int _htmlTagValidOrRaise(tag_utf) except -1:
+    if not _pyHtmlNameIsValid(tag_utf):
+        raise ValueError(f"Invalid HTML tag name {(<bytes>tag_utf).decode('utf8')!r}")
+    return 0
+
+cdef int _attributeValidOrRaise(name_utf) except -1:
+    if not _pyXmlNameIsValid(name_utf):
+        raise ValueError(f"Invalid attribute name {(<bytes>name_utf).decode('utf8')!r}")
+    return 0
+
+cdef int _prefixValidOrRaise(tag_utf) except -1:
+    if not _pyXmlNameIsValid(tag_utf):
+        raise ValueError(f"Invalid namespace prefix {(<bytes>tag_utf).decode('utf8')!r}")
+    return 0
+
+cdef int _uriValidOrRaise(uri_utf) except -1:
+    cdef uri.xmlURI* c_uri = uri.xmlParseURI(_cstr(uri_utf))
+    if c_uri is NULL:
+        raise ValueError(f"Invalid namespace URI {(<bytes>uri_utf).decode('utf8')!r}")
+    uri.xmlFreeURI(c_uri)
+    return 0
+
+cdef inline unicode _namespacedName(xmlNode* c_node):
+    return _namespacedNameFromNsName(_getNs(c_node), c_node.name)
+
+
+cdef unicode _namespacedNameFromNsName(const_xmlChar* c_href, const_xmlChar* c_name):
+    name = funicode(c_name)
+    if c_href is NULL:
+        return name
+    href = funicode(c_href)
+    return f"{{{href}}}{name}"
+
+
+cdef _getFilenameForFile(source):
+    """Given a Python File or Gzip object, give filename back.
+
+    Returns None if not a file object.
+    """
+    # urllib2 provides a geturl() method
+    try:
+        return source.geturl()
+    except:
+        pass
+    # file instances have a name attribute
+    try:
+        filename = source.name
+        if _isString(filename):
+            return os_path_abspath(filename)
+    except:
+        pass
+    # gzip file instances have a filename attribute (before Py3k)
+    try:
+        filename = source.filename
+        if _isString(filename):
+            return os_path_abspath(filename)
+    except:
+        pass
+    # can't determine filename
+    return None
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz