diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lxml/readonlytree.pxi | 565 |
1 files changed, 565 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi b/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi new file mode 100644 index 00000000..9bc9a660 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi @@ -0,0 +1,565 @@ +# read-only tree implementation + +@cython.internal +cdef class _ReadOnlyProxy: + "A read-only proxy class suitable for PIs/Comments (for internal use only!)." + cdef bint _free_after_use + cdef xmlNode* _c_node + cdef _ReadOnlyProxy _source_proxy + cdef list _dependent_proxies + def __cinit__(self): + self._c_node = NULL + self._free_after_use = 0 + + cdef int _assertNode(self) except -1: + """This is our way of saying: this proxy is invalid! + """ + if not self._c_node: + raise ReferenceError("Proxy invalidated!") + return 0 + + cdef int _raise_unsupported_type(self) except -1: + raise TypeError(f"Unsupported node type: {self._c_node.type}") + + cdef void free_after_use(self) noexcept: + """Should the xmlNode* be freed when releasing the proxy? + """ + self._free_after_use = 1 + + @property + def tag(self): + """Element tag + """ + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _namespacedName(self._c_node) + elif self._c_node.type == tree.XML_PI_NODE: + return ProcessingInstruction + elif self._c_node.type == tree.XML_COMMENT_NODE: + return Comment + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return Entity + else: + self._raise_unsupported_type() + + @property + def text(self): + """Text before the first subelement. This is either a string or + the value None, if there was no text. + """ + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _collectText(self._c_node.children) + elif self._c_node.type in (tree.XML_PI_NODE, + tree.XML_COMMENT_NODE): + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return f'&{funicode(self._c_node.name)};' + else: + self._raise_unsupported_type() + + @property + def tail(self): + """Text after this element's end tag, but before the next sibling + element's start tag. This is either a string or the value None, if + there was no text. + """ + self._assertNode() + return _collectText(self._c_node.next) + + @property + def sourceline(self): + """Original line number as found by the parser or None if unknown. + """ + cdef long line + self._assertNode() + line = tree.xmlGetLineNo(self._c_node) + if line > 0: + return line + else: + return None + + def __repr__(self): + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return "<Element %s at 0x%x>" % (self.tag, id(self)) + elif self._c_node.type == tree.XML_COMMENT_NODE: + return "<!--%s-->" % self.text + elif self._c_node.type == tree.XML_ENTITY_NODE: + return "&%s;" % funicode(self._c_node.name) + elif self._c_node.type == tree.XML_PI_NODE: + text = self.text + if text: + return "<?%s %s?>" % (self.target, text) + else: + return "<?%s?>" % self.target + else: + self._raise_unsupported_type() + + def __getitem__(self, x): + """Returns the subelement at the given position or the requested + slice. + """ + cdef xmlNode* c_node = NULL + cdef Py_ssize_t step = 0, slicelength = 0 + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element + cdef list result + self._assertNode() + if isinstance(x, slice): + # slicing + if _isFullSlice(<slice>x): + return _collectChildren(self) + _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) + if c_node is NULL: + return [] + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement + result = [] + c = 0 + while c_node is not NULL and c < slicelength: + result.append(_newReadOnlyProxy(self._source_proxy, c_node)) + result.append(_elementFactory(self._doc, c_node)) + c = c + 1 + for i from 0 <= i < step: + c_node = next_element(c_node) + return result + else: + # indexing + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" + return _newReadOnlyProxy(self._source_proxy, c_node) + + def __len__(self): + """Returns the number of subelements. + """ + cdef Py_ssize_t c + cdef xmlNode* c_node + self._assertNode() + c = 0 + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + c = c + 1 + c_node = c_node.next + return c + + def __bool__(self): + cdef xmlNode* c_node + self._assertNode() + c_node = _findChildBackwards(self._c_node, 0) + return c_node != NULL + + def __deepcopy__(self, memo): + "__deepcopy__(self, memo)" + return self.__copy__() + + cpdef __copy__(self): + "__copy__(self)" + cdef xmlDoc* c_doc + cdef xmlNode* c_node + cdef _Document new_doc + if self._c_node is NULL: + return self + c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive + new_doc = _documentFactory(c_doc, None) + root = new_doc.getroot() + if root is not None: + return root + # Comment/PI + c_node = c_doc.children + while c_node is not NULL and c_node.type != self._c_node.type: + c_node = c_node.next + if c_node is NULL: + return None + return _elementFactory(new_doc, c_node) + + def __iter__(self): + return iter(self.getchildren()) + + def iterchildren(self, tag=None, *, reversed=False): + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. + """ + children = self.getchildren() + if tag is not None and tag != '*': + children = [ el for el in children if el.tag == tag ] + if reversed: + children = children[::-1] + return iter(children) + + cpdef getchildren(self): + """Returns all subelements. The elements are returned in document + order. + """ + cdef xmlNode* c_node + cdef list result + self._assertNode() + result = [] + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + result.append(_newReadOnlyProxy(self._source_proxy, c_node)) + c_node = c_node.next + return result + + def getparent(self): + """Returns the parent of this element or None for the root element. + """ + cdef xmlNode* c_parent + self._assertNode() + c_parent = self._c_node.parent + if c_parent is NULL or not tree._isElement(c_parent): + return None + else: + return _newReadOnlyProxy(self._source_proxy, c_parent) + + def getnext(self): + """Returns the following sibling of this element or None. + """ + cdef xmlNode* c_node + self._assertNode() + c_node = _nextElement(self._c_node) + if c_node is not NULL: + return _newReadOnlyProxy(self._source_proxy, c_node) + return None + + def getprevious(self): + """Returns the preceding sibling of this element or None. + """ + cdef xmlNode* c_node + self._assertNode() + c_node = _previousElement(self._c_node) + if c_node is not NULL: + return _newReadOnlyProxy(self._source_proxy, c_node) + return None + + +@cython.final +@cython.internal +cdef class _ReadOnlyPIProxy(_ReadOnlyProxy): + """A read-only proxy for processing instructions (for internal use only!)""" + @property + def target(self): + self._assertNode() + return funicode(self._c_node.name) + +@cython.final +@cython.internal +cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy): + """A read-only proxy for entity references (for internal use only!)""" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value_utf = _utf8(value) + if '&' in value or ';' in value: + raise ValueError(f"Invalid entity name '{value}'") + tree.xmlNodeSetName(self._c_node, _xcstr(value_utf)) + + @property + def text(self): + return f'&{funicode(self._c_node.name)};' + + +@cython.internal +cdef class _ReadOnlyElementProxy(_ReadOnlyProxy): + """The main read-only Element proxy class (for internal use only!).""" + + @property + def attrib(self): + self._assertNode() + return dict(_collectAttributes(self._c_node, 3)) + + @property + def prefix(self): + """Namespace prefix or None. + """ + self._assertNode() + if self._c_node.ns is not NULL: + if self._c_node.ns.prefix is not NULL: + return funicode(self._c_node.ns.prefix) + return None + + @property + def nsmap(self): + """Namespace prefix->URI mapping known in the context of this + Element. This includes all namespace declarations of the + parents. + + Note that changing the returned dict has no effect on the Element. + """ + self._assertNode() + return _build_nsmap(self._c_node) + + def get(self, key, default=None): + """Gets an element attribute. + """ + self._assertNode() + return _getNodeAttributeValue(self._c_node, key, default) + + def keys(self): + """Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). + """ + self._assertNode() + return _collectAttributes(self._c_node, 1) + + def values(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 2) + + def items(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 3) + +cdef _ReadOnlyProxy _newReadOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy) + elif c_node.type in (tree.XML_COMMENT_NODE, + tree.XML_ENTITY_REF_NODE): + el = _ReadOnlyProxy.__new__(_ReadOnlyProxy) + else: + raise TypeError(f"Unsupported element type: {c_node.type}") + el._c_node = c_node + _initReadOnlyProxy(el, source_proxy) + return el + +cdef inline _initReadOnlyProxy(_ReadOnlyProxy el, + _ReadOnlyProxy source_proxy): + if source_proxy is None: + el._source_proxy = el + el._dependent_proxies = [el] + else: + el._source_proxy = source_proxy + source_proxy._dependent_proxies.append(el) + +cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy): + cdef xmlNode* c_node + cdef _ReadOnlyProxy el + if sourceProxy is None: + return + if sourceProxy._dependent_proxies is None: + return + for el in sourceProxy._dependent_proxies: + c_node = el._c_node + el._c_node = NULL + if el._free_after_use: + tree.xmlFreeNode(c_node) + del sourceProxy._dependent_proxies[:] + +# opaque wrapper around non-element nodes, e.g. the document node +# +# This class does not imply any restrictions on modifiability or +# read-only status of the node, so use with caution. + +@cython.internal +cdef class _OpaqueNodeWrapper: + cdef tree.xmlNode* _c_node + def __init__(self): + raise TypeError, "This type cannot be instantiated from Python" + +@cython.final +@cython.internal +cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper): + cdef int _assertNode(self) except -1: + """This is our way of saying: this proxy is invalid! + """ + assert self._c_node is not NULL, "Proxy invalidated!" + return 0 + + cpdef append(self, other_element): + """Append a copy of an Element to the list of children. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + self._assertNode() + c_node = _roNodeOf(other_element) + if c_node.type == tree.XML_ELEMENT_NODE: + if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL: + raise ValueError, "cannot append, document already has a root element" + elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE): + raise TypeError, f"unsupported element type for top-level node: {c_node.type}" + c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node) + c_next = c_node.next + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) + + def extend(self, elements): + """Append a copy of all Elements from a sequence to the list of + children. + """ + self._assertNode() + for element in elements: + self.append(element) + +cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node): + cdef _OpaqueNodeWrapper node + if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE): + node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper) + else: + node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper) + node._c_node = c_node + return node + +# element proxies that allow restricted modification + +@cython.internal +cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy): + """A read-only proxy that allows changing the text content. + """ + property text: + def __get__(self): + self._assertNode() + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + + def __set__(self, value): + cdef tree.xmlDict* c_dict + self._assertNode() + if value is None: + c_text = <const_xmlChar*>NULL + else: + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetContent(self._c_node, c_text) + +@cython.final +@cython.internal +cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy): + """A read-only proxy that allows changing the text/target content of a + processing instruction. + """ + property target: + def __get__(self): + self._assertNode() + return funicode(self._c_node.name) + + def __set__(self, value): + self._assertNode() + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + +@cython.final +@cython.internal +cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy): + "A read-only proxy for entity references (for internal use only!)" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value = _utf8(value) + assert '&' not in value and ';' not in value, \ + f"Invalid entity name '{value}'" + c_text = _xcstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + + +@cython.final +@cython.internal +cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy): + """A read-only element that allows adding children and changing the + text content (i.e. everything that adds to the subtree). + """ + cpdef append(self, other_element): + """Append a copy of an Element to the list of children. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + self._assertNode() + c_node = _roNodeOf(other_element) + c_node = _copyNodeToDoc(c_node, self._c_node.doc) + c_next = c_node.next + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) + + def extend(self, elements): + """Append a copy of all Elements from a sequence to the list of + children. + """ + self._assertNode() + for element in elements: + self.append(element) + + property text: + """Text before the first subelement. This is either a string or the + value None, if there was no text. + """ + def __get__(self): + self._assertNode() + return _collectText(self._c_node.children) + + def __set__(self, value): + self._assertNode() + if isinstance(value, QName): + value = _resolveQNameText(self, value).decode('utf8') + _setNodeText(self._c_node, value) + + +cdef _ReadOnlyProxy _newAppendOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy) + elif c_node.type == tree.XML_COMMENT_NODE: + el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy) + else: + raise TypeError(f"Unsupported element type: {c_node.type}") + el._c_node = c_node + _initReadOnlyProxy(el, source_proxy) + return el + +cdef xmlNode* _roNodeOf(element) except NULL: + cdef xmlNode* c_node + if isinstance(element, _Element): + c_node = (<_Element>element)._c_node + elif isinstance(element, _ReadOnlyProxy): + c_node = (<_ReadOnlyProxy>element)._c_node + elif isinstance(element, _OpaqueNodeWrapper): + c_node = (<_OpaqueNodeWrapper>element)._c_node + else: + raise TypeError, f"invalid argument type {type(element)}" + + if c_node is NULL: + raise TypeError, "invalid element" + return c_node + +cdef xmlNode* _nonRoNodeOf(element) except NULL: + cdef xmlNode* c_node + if isinstance(element, _Element): + c_node = (<_Element>element)._c_node + elif isinstance(element, _AppendOnlyElementProxy): + c_node = (<_AppendOnlyElementProxy>element)._c_node + elif isinstance(element, _OpaqueNodeWrapper): + c_node = (<_OpaqueNodeWrapper>element)._c_node + else: + raise TypeError, f"invalid argument type {type(element)}" + + if c_node is NULL: + raise TypeError, "invalid element" + return c_node |