about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/objectpath.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/objectpath.pxi332
1 files changed, 332 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi b/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi
new file mode 100644
index 00000000..e562a365
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi
@@ -0,0 +1,332 @@
+################################################################################
+# ObjectPath
+
+ctypedef struct _ObjectPath:
+    const_xmlChar* href
+    const_xmlChar* name
+    Py_ssize_t index
+
+
+cdef object _NO_DEFAULT = object()
+
+
+cdef class ObjectPath:
+    """ObjectPath(path)
+    Immutable object that represents a compiled object path.
+
+    Example for a path: 'root.child[1].{other}child[25]'
+    """
+    cdef readonly object find
+    cdef list _path
+    cdef object _path_str
+    cdef _ObjectPath*  _c_path
+    cdef Py_ssize_t _path_len
+    def __init__(self, path):
+        if python._isString(path):
+            self._path = _parse_object_path_string(path)
+            self._path_str = path
+        else:
+            self._path = _parse_object_path_list(path)
+            self._path_str = '.'.join(path)
+        self._path_len = len(self._path)
+        self._c_path = _build_object_path_segments(self._path)
+        self.find = self.__call__
+
+    def __dealloc__(self):
+        if self._c_path is not NULL:
+            python.lxml_free(self._c_path)
+
+    def __str__(self):
+        return self._path_str
+
+    def __call__(self, _Element root not None, *_default):
+        """Follow the attribute path in the object structure and return the
+        target attribute value.
+
+        If it it not found, either returns a default value (if one was passed
+        as second argument) or raises AttributeError.
+        """
+        if _default:
+            if len(_default) > 1:
+                raise TypeError, "invalid number of arguments: needs one or two"
+            default = _default[0]
+        else:
+            default = _NO_DEFAULT
+        return _find_object_path(root, self._c_path, self._path_len, default)
+
+    def hasattr(self, _Element root not None):
+        "hasattr(self, root)"
+        try:
+            _find_object_path(root, self._c_path, self._path_len, _NO_DEFAULT)
+        except AttributeError:
+            return False
+        return True
+
+    def setattr(self, _Element root not None, value):
+        """setattr(self, root, value)
+
+        Set the value of the target element in a subtree.
+
+        If any of the children on the path does not exist, it is created.
+        """
+        _create_object_path(root, self._c_path, self._path_len, 1, value)
+
+    def addattr(self, _Element root not None, value):
+        """addattr(self, root, value)
+
+        Append a value to the target element in a subtree.
+
+        If any of the children on the path does not exist, it is created.
+        """
+        _create_object_path(root, self._c_path, self._path_len, 0, value)
+
+
+cdef object __MATCH_PATH_SEGMENT = re.compile(
+    r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?",
+    re.U).match
+
+cdef tuple _RELATIVE_PATH_SEGMENT = (None, None, 0)
+
+
+cdef list _parse_object_path_string(_path):
+    """Parse object path string into a (ns, name, index) list.
+    """
+    cdef bint has_dot
+    cdef unicode path
+    new_path = []
+    if isinstance(_path, bytes):
+        path = (<bytes>_path).decode('ascii')
+    elif type(_path) is not unicode:
+        path = unicode(_path)
+    else:
+        path = _path
+    path = path.strip()
+    if path == '.':
+        return [_RELATIVE_PATH_SEGMENT]
+    path_pos = 0
+    while path:
+        match = __MATCH_PATH_SEGMENT(path, path_pos)
+        if match is None:
+            break
+
+        dot, ns, name, index = match.groups()
+        index = int(index) if index else 0
+        has_dot = dot == '.'
+        if not new_path:
+            if has_dot:
+                # path '.child' => ignore root
+                new_path.append(_RELATIVE_PATH_SEGMENT)
+            elif index:
+                raise ValueError, "index not allowed on root node"
+        elif not has_dot:
+            raise ValueError, "invalid path"
+        if ns is not None:
+            ns = python.PyUnicode_AsUTF8String(ns)
+        name = python.PyUnicode_AsUTF8String(name)
+        new_path.append( (ns, name, index) )
+
+        path_pos = match.end()
+    if not new_path or len(path) > path_pos:
+        raise ValueError, "invalid path"
+    return new_path
+
+
+cdef list _parse_object_path_list(path):
+    """Parse object path sequence into a (ns, name, index) list.
+    """
+    new_path = []
+    for item in path:
+        item = item.strip()
+        if not new_path and item == '':
+            # path '.child' => ignore root
+            ns = name = None
+            index = 0
+        else:
+            ns, name = cetree.getNsTag(item)
+            c_name = _xcstr(name)
+            index_pos = tree.xmlStrchr(c_name, c'[')
+            if index_pos is NULL:
+                index = 0
+            else:
+                index_end = tree.xmlStrchr(index_pos + 1, c']')
+                if index_end is NULL:
+                    raise ValueError, "index must be enclosed in []"
+                index = int(index_pos[1:index_end - index_pos])
+                if not new_path and index != 0:
+                    raise ValueError, "index not allowed on root node"
+                name = <bytes>c_name[:index_pos - c_name]
+        new_path.append( (ns, name, index) )
+    if not new_path:
+        raise ValueError, "invalid path"
+    return new_path
+
+
+cdef _ObjectPath* _build_object_path_segments(list path_list) except NULL:
+    cdef _ObjectPath* c_path
+    cdef _ObjectPath* c_path_segments
+    c_path_segments = <_ObjectPath*>python.lxml_malloc(len(path_list), sizeof(_ObjectPath))
+    if c_path_segments is NULL:
+        raise MemoryError()
+    c_path = c_path_segments
+    for href, name, index in path_list:
+        c_path[0].href = _xcstr(href) if href is not None else NULL
+        c_path[0].name = _xcstr(name) if name is not None else NULL
+        c_path[0].index = index
+        c_path += 1
+    return c_path_segments
+
+
+cdef _find_object_path(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value):
+    """Follow the path to find the target element.
+    """
+    cdef tree.xmlNode* c_node
+    cdef Py_ssize_t c_index
+    c_node = root._c_node
+    c_name = c_path[0].name
+    c_href = c_path[0].href
+    if c_href is NULL or c_href[0] == c'\0':
+        c_href = tree._getNs(c_node)
+    if not cetree.tagMatches(c_node, c_href, c_name):
+        if default_value is not _NO_DEFAULT:
+            return default_value
+        else:
+            raise ValueError(
+                f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
+
+    while c_node is not NULL:
+        c_path_len -= 1
+        if c_path_len <= 0:
+            break
+
+        c_path += 1
+        if c_path[0].href is not NULL:
+            c_href = c_path[0].href # otherwise: keep parent namespace
+        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
+        if c_name is NULL:
+            c_name = c_path[0].name
+            c_node = NULL
+            break
+        c_index = c_path[0].index
+        c_node = c_node.last if c_index < 0 else c_node.children
+        c_node = _findFollowingSibling(c_node, c_href, c_name, c_index)
+
+    if c_node is not NULL:
+        return cetree.elementFactory(root._doc, c_node)
+    elif default_value is not _NO_DEFAULT:
+        return default_value
+    else:
+        tag = cetree.namespacedNameFromNsName(c_href, c_name)
+        raise AttributeError, f"no such child: {tag}"
+
+
+cdef _create_object_path(_Element root, _ObjectPath* c_path,
+                         Py_ssize_t c_path_len, int replace, value):
+    """Follow the path to find the target element, build the missing children
+    as needed and set the target element to 'value'.  If replace is true, an
+    existing value is replaced, otherwise the new value is added.
+    """
+    cdef _Element child
+    cdef tree.xmlNode* c_node
+    cdef tree.xmlNode* c_child
+    cdef Py_ssize_t c_index
+    if c_path_len == 1:
+        raise TypeError, "cannot update root node"
+
+    c_node = root._c_node
+    c_name = c_path[0].name
+    c_href = c_path[0].href
+    if c_href is NULL or c_href[0] == c'\0':
+        c_href = tree._getNs(c_node)
+    if not cetree.tagMatches(c_node, c_href, c_name):
+        raise ValueError(
+            f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}")
+
+    while c_path_len > 1:
+        c_path_len -= 1
+        c_path += 1
+        if c_path[0].href is not NULL:
+            c_href = c_path[0].href # otherwise: keep parent namespace
+        c_index = c_path[0].index
+        c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1)
+        if c_name is NULL:
+            c_name = c_path[0].name
+            c_child = NULL
+        else:
+            c_child = c_node.last if c_index < 0 else c_node.children
+            c_child = _findFollowingSibling(c_child, c_href, c_name, c_index)
+
+        if c_child is not NULL:
+            c_node = c_child
+        elif c_index != 0:
+            raise TypeError, "creating indexed path attributes is not supported"
+        elif c_path_len == 1:
+            _appendValue(cetree.elementFactory(root._doc, c_node),
+                         cetree.namespacedNameFromNsName(c_href, c_name),
+                         value)
+            return
+        else:
+            child = cetree.makeSubElement(
+                cetree.elementFactory(root._doc, c_node),
+                cetree.namespacedNameFromNsName(c_href, c_name),
+                None, None, None, None)
+            c_node = child._c_node
+
+    # if we get here, the entire path was already there
+    if replace:
+        element = cetree.elementFactory(root._doc, c_node)
+        _replaceElement(element, value)
+    else:
+        _appendValue(cetree.elementFactory(root._doc, c_node.parent),
+                     cetree.namespacedName(c_node), value)
+
+
+cdef list _build_descendant_paths(tree.xmlNode* c_node, prefix_string):
+    """Returns a list of all descendant paths.
+    """
+    cdef list path, path_list
+    tag = cetree.namespacedName(c_node)
+    if prefix_string:
+        if prefix_string[-1] != '.':
+            prefix_string += '.'
+        prefix_string = prefix_string + tag
+    else:
+        prefix_string = tag
+    path = [prefix_string]
+    path_list = []
+    _recursive_build_descendant_paths(c_node, path, path_list)
+    return path_list
+
+
+cdef int _recursive_build_descendant_paths(tree.xmlNode* c_node,
+                                           list path, list path_list) except -1:
+    """Fills the list 'path_list' with all descendant paths, initial prefix
+    being in the list 'path'.
+    """
+    cdef tree.xmlNode* c_child
+    tags = {}
+    path_list.append('.'.join(path))
+    c_href = tree._getNs(c_node)
+    c_child = c_node.children
+    while c_child is not NULL:
+        while c_child.type != tree.XML_ELEMENT_NODE:
+            c_child = c_child.next
+            if c_child is NULL:
+                return 0
+        if c_href is tree._getNs(c_child):
+            tag = pyunicode(c_child.name)
+        elif c_href is not NULL and tree._getNs(c_child) is NULL:
+            # special case: parent has namespace, child does not
+            tag = '{}' + pyunicode(c_child.name)
+        else:
+            tag = cetree.namespacedName(c_child)
+        count = tags.get(tag)
+        if count is None:
+            tags[tag] = 1
+        else:
+            tags[tag] = count + 1
+            tag += f'[{count}]'
+        path.append(tag)
+        _recursive_build_descendant_paths(c_child, path, path_list)
+        del path[-1]
+        c_child = c_child.next
+    return 0