From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../python3.12/site-packages/lxml/objectify.pyx | 2145 ++++++++++++++++++++ 1 file changed, 2145 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/lxml/objectify.pyx (limited to '.venv/lib/python3.12/site-packages/lxml/objectify.pyx') diff --git a/.venv/lib/python3.12/site-packages/lxml/objectify.pyx b/.venv/lib/python3.12/site-packages/lxml/objectify.pyx new file mode 100644 index 00000000..0ff92226 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/objectify.pyx @@ -0,0 +1,2145 @@ +# cython: binding=True +# cython: auto_pickle=False +# cython: language_level=3 + +""" +The ``lxml.objectify`` module implements a Python object API for XML. +It is based on `lxml.etree`. +""" + +cimport cython + +from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup +from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode +from lxml.includes.tree cimport const_xmlChar, _xcstr +from lxml cimport python +from lxml.includes cimport tree + +cimport lxml.includes.etreepublic as cetree +cimport libc.string as cstring_h # not to be confused with stdlib 'string' +from libc.string cimport const_char + +__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker', + 'FloatElement', 'IntElement', 'NoneElement', + 'NumberElement', 'ObjectPath', 'ObjectifiedDataElement', + 'ObjectifiedElement', 'ObjectifyElementClassLookup', + 'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement', + 'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str', + 'fromstring', 'getRegisteredTypes', 'makeparser', 'parse', + 'pyannotate', 'pytypename', 'set_default_parser', + 'set_pytype_attribute_tag', 'xsiannotate'] + +cdef object etree +from lxml import etree +# initialize C-API of lxml.etree +import_lxml__etree() + +__version__ = etree.__version__ + +cdef object _float_is_inf, _float_is_nan +from math import isinf as _float_is_inf, isnan as _float_is_nan + +cdef object re +import re + +cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError) +cdef object is_special_method = re.compile('__.*__$').match + + +cdef object _typename(object t): + cdef const_char* c_name + c_name = python._fqtypename(t) + s = cstring_h.strrchr(c_name, c'.') + if s is not NULL: + c_name = s + 1 + return pyunicode(c_name) + + +# namespace/name for "pytype" hint attribute +cdef object PYTYPE_NAMESPACE +cdef bytes PYTYPE_NAMESPACE_UTF8 +cdef const_xmlChar* _PYTYPE_NAMESPACE + +cdef object PYTYPE_ATTRIBUTE_NAME +cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8 +cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME + +PYTYPE_ATTRIBUTE = None + +cdef unicode TREE_PYTYPE_NAME = "TREE" + +cdef tuple _unicodeAndUtf8(s): + return s, python.PyUnicode_AsUTF8String(s) + +def set_pytype_attribute_tag(attribute_tag=None): + """set_pytype_attribute_tag(attribute_tag=None) + Change name and namespace of the XML attribute that holds Python type + information. + + Do not use this unless you know what you are doing. + + Reset by calling without argument. + + Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" + """ + global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME + global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 + global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 + if attribute_tag is None: + PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \ + _unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype") + PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ + _unicodeAndUtf8("pytype") + else: + PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ + cetree.getNsTag(attribute_tag) + PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8') + PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8') + + _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8 + _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8 + PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( + _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + +set_pytype_attribute_tag() + + +# namespaces for XML Schema +cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 +XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \ + _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema") +cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8) + +cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 +XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \ + _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance") +cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8) + +cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS +cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS + + +################################################################################ +# Element class for the main API + +cdef class ObjectifiedElement(ElementBase): + """Main XML Element class. + + Element children are accessed as object attributes. Multiple children + with the same name are available through a list index. Example:: + + >>> root = XML("01") + >>> second_c2 = root.c1.c2[1] + >>> print(second_c2.text) + 1 + + Note that you cannot (and must not) instantiate this class or its + subclasses. + """ + def __iter__(self): + """Iterate over self and all siblings with the same tag. + """ + parent = self.getparent() + if parent is None: + return iter([self]) + return etree.ElementChildIterator(parent, tag=self.tag) + + def __str__(self): + if __RECURSIVE_STR: + return _dump(self, 0) + else: + return textOf(self._c_node) or '' + + # pickle support for objectified Element + def __reduce__(self): + return fromstring, (etree.tostring(self),) + + @property + def text(self): + return textOf(self._c_node) + + @property + def __dict__(self): + """A fake implementation for __dict__ to support dir() etc. + + Note that this only considers the first child with a given name. + """ + cdef _Element child + cdef dict children + c_ns = tree._getNs(self._c_node) + tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None + children = {} + for child in etree.ElementChildIterator(self, tag=tag): + if c_ns is NULL and tree._getNs(child._c_node) is not NULL: + continue + name = pyunicode(child._c_node.name) + if name not in children: + children[name] = child + return children + + def __len__(self): + """Count self and siblings with the same tag. + """ + return _countSiblings(self._c_node) + + def countchildren(self): + """countchildren(self) + + Return the number of children of this element, regardless of their + name. + """ + # copied from etree + cdef Py_ssize_t c + cdef tree.xmlNode* c_node + c = 0 + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + c += 1 + c_node = c_node.next + return c + + def getchildren(self): + """getchildren(self) + + Returns a sequence of all direct children. The elements are + returned in document order. + """ + cdef tree.xmlNode* c_node + result = [] + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + result.append(cetree.elementFactory(self._doc, c_node)) + c_node = c_node.next + return result + + def __getattr__(self, tag): + """Return the (first) child with the given tag name. If no namespace + is provided, the child will be looked up in the same one as self. + """ + return _lookupChildOrRaise(self, tag) + + def __setattr__(self, tag, value): + """Set the value of the (first) child with the given tag name. If no + namespace is provided, the child will be looked up in the same one as + self. + """ + cdef _Element element + # properties are looked up /after/ __setattr__, so we must emulate them + if tag == 'text' or tag == 'pyval': + # read-only ! + raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable" + elif tag == 'tail': + cetree.setTailText(self._c_node, value) + return + elif tag == 'tag': + ElementBase.tag.__set__(self, value) + return + elif tag == 'base': + ElementBase.base.__set__(self, value) + return + tag = _buildChildTag(self, tag) + element = _lookupChild(self, tag) + if element is None: + _appendValue(self, tag, value) + else: + _replaceElement(element, value) + + def __delattr__(self, tag): + child = _lookupChildOrRaise(self, tag) + self.remove(child) + + def addattr(self, tag, value): + """addattr(self, tag, value) + + Add a child value to the element. + + As opposed to append(), it sets a data value, not an element. + """ + _appendValue(self, _buildChildTag(self, tag), value) + + def __getitem__(self, key): + """Return a sibling, counting from the first child of the parent. The + method behaves like both a dict and a sequence. + + * If argument is an integer, returns the sibling at that position. + + * If argument is a string, does the same as getattr(). This can be + used to provide namespaces for element lookup, or to look up + children with special names (``text`` etc.). + + * If argument is a slice object, returns the matching slice. + """ + cdef tree.xmlNode* c_self_node + cdef tree.xmlNode* c_parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_index + if python._isString(key): + return _lookupChildOrRaise(self, key) + elif isinstance(key, slice): + return list(self)[key] + # normal item access + c_index = key # raises TypeError if necessary + c_self_node = self._c_node + c_parent = c_self_node.parent + if c_parent is NULL: + if c_index == 0 or c_index == -1: + return self + raise IndexError, unicode(key) + if c_index < 0: + c_node = c_parent.last + else: + c_node = c_parent.children + c_node = _findFollowingSibling( + c_node, tree._getNs(c_self_node), c_self_node.name, c_index) + if c_node is NULL: + raise IndexError, unicode(key) + return elementFactory(self._doc, c_node) + + def __setitem__(self, key, value): + """Set the value of a sibling, counting from the first child of the + parent. Implements key assignment, item assignment and slice + assignment. + + * If argument is an integer, sets the sibling at that position. + + * If argument is a string, does the same as setattr(). This is used + to provide namespaces for element lookup. + + * If argument is a sequence (list, tuple, etc.), assign the contained + items to the siblings. + """ + cdef _Element element + cdef tree.xmlNode* c_node + if python._isString(key): + key = _buildChildTag(self, key) + element = _lookupChild(self, key) + if element is None: + _appendValue(self, key, value) + else: + _replaceElement(element, value) + return + + if self._c_node.parent is NULL: + # the 'root[i] = ...' case + raise TypeError, "assignment to root element is invalid" + + if isinstance(key, slice): + # slice assignment + _setSlice(key, self, value) + else: + # normal index assignment + if key < 0: + c_node = self._c_node.parent.last + else: + c_node = self._c_node.parent.children + c_node = _findFollowingSibling( + c_node, tree._getNs(self._c_node), self._c_node.name, key) + if c_node is NULL: + raise IndexError, unicode(key) + element = elementFactory(self._doc, c_node) + _replaceElement(element, value) + + def __delitem__(self, key): + parent = self.getparent() + if parent is None: + raise TypeError, "deleting items not supported by root element" + if isinstance(key, slice): + # slice deletion + del_items = list(self)[key] + remove = parent.remove + for el in del_items: + remove(el) + else: + # normal index deletion + sibling = self.__getitem__(key) + parent.remove(sibling) + + def descendantpaths(self, prefix=None): + """descendantpaths(self, prefix=None) + + Returns a list of object path expressions for all descendants. + """ + if prefix is not None and not python._isString(prefix): + prefix = '.'.join(prefix) + return _build_descendant_paths(self._c_node, prefix) + + +cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name): + if c_node.name != c_name: + return 0 + if c_href == NULL: + return 1 + c_node_href = tree._getNs(c_node) + if c_node_href == NULL: + return c_href[0] == c'\0' + return tree.xmlStrcmp(c_node_href, c_href) == 0 + + +cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node): + cdef tree.xmlNode* c_node + cdef Py_ssize_t count + c_tag = c_start_node.name + c_href = tree._getNs(c_start_node) + count = 1 + c_node = c_start_node.next + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, c_href, c_tag): + count += 1 + c_node = c_node.next + c_node = c_start_node.prev + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, c_href, c_tag): + count += 1 + c_node = c_node.prev + return count + +cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, + const_xmlChar* href, const_xmlChar* name, + Py_ssize_t index): + cdef tree.xmlNode* (*next)(tree.xmlNode*) + if index >= 0: + next = cetree.nextElement + else: + index = -1 - index + next = cetree.previousElement + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, href, name): + index = index - 1 + if index < 0: + return c_node + c_node = next(c_node) + return NULL + +cdef object _lookupChild(_Element parent, tag): + cdef tree.xmlNode* c_result + cdef tree.xmlNode* c_node + c_node = parent._c_node + ns, tag = cetree.getNsTagWithEmptyNs(tag) + c_tag = tree.xmlDictExists( + c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag)) + if c_tag is NULL: + return None # not in the hash map => not in the tree + if ns is None: + # either inherit ns from parent or use empty (i.e. no) namespace + c_href = tree._getNs(c_node) or '' + else: + c_href = _xcstr(ns) + c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) + if c_result is NULL: + return None + return elementFactory(parent._doc, c_result) + +cdef object _lookupChildOrRaise(_Element parent, tag): + element = _lookupChild(parent, tag) + if element is None: + raise AttributeError, "no such child: " + _buildChildTag(parent, tag) + return element + +cdef object _buildChildTag(_Element parent, tag): + ns, tag = cetree.getNsTag(tag) + c_tag = _xcstr(tag) + c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns) + return cetree.namespacedNameFromNsName(c_href, c_tag) + +cdef _replaceElement(_Element element, value): + cdef _Element new_element + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + element._doc, (<_Element>value)._c_node) + new_element.tag = element.tag + elif isinstance(value, (list, tuple)): + element[:] = value + return + else: + new_element = element.makeelement(element.tag) + _setElementValue(new_element, value) + element.getparent().replace(element, new_element) + +cdef _appendValue(_Element parent, tag, value): + cdef _Element new_element + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + parent._doc, (<_Element>value)._c_node) + new_element.tag = tag + cetree.appendChildToElement(parent, new_element) + elif isinstance(value, (list, tuple)): + for item in value: + _appendValue(parent, tag, item) + else: + new_element = cetree.makeElement( + tag, parent._doc, None, None, None, None, None) + _setElementValue(new_element, value) + cetree.appendChildToElement(parent, new_element) + +cdef _setElementValue(_Element element, value): + if value is None: + cetree.setAttributeValue( + element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + elif isinstance(value, _Element): + _replaceElement(element, value) + return + else: + cetree.delAttributeFromNsName( + element._c_node, _XML_SCHEMA_INSTANCE_NS, "nil") + if python._isString(value): + pytype_name = "str" + py_type = _PYTYPE_DICT.get(pytype_name) + else: + pytype_name = _typename(value) + py_type = _PYTYPE_DICT.get(pytype_name) + if py_type is not None: + value = py_type.stringify(value) + else: + value = unicode(value) + if py_type is not None: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) + else: + cetree.delAttributeFromNsName( + element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + cetree.setNodeText(element._c_node, value) + +cdef _setSlice(sliceobject, _Element target, items): + cdef _Element parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_step, c_start, pos + # collect existing slice + if (sliceobject).step is None: + c_step = 1 + else: + c_step = (sliceobject).step + if c_step == 0: + raise ValueError, "Invalid slice" + cdef list del_items = target[sliceobject] + + # collect new values + new_items = [] + tag = target.tag + for item in items: + if isinstance(item, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + target._doc, (<_Element>item)._c_node) + new_element.tag = tag + else: + new_element = cetree.makeElement( + tag, target._doc, None, None, None, None, None) + _setElementValue(new_element, item) + new_items.append(new_element) + + # sanity check - raise what a list would raise + if c_step != 1 and len(del_items) != len(new_items): + raise ValueError, \ + f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}" + + # replace existing items + pos = 0 + parent = target.getparent() + replace = parent.replace + while pos < len(new_items) and pos < len(del_items): + replace(del_items[pos], new_items[pos]) + pos += 1 + # remove leftover items + if pos < len(del_items): + remove = parent.remove + while pos < len(del_items): + remove(del_items[pos]) + pos += 1 + # append remaining new items + if pos < len(new_items): + # the sanity check above guarantees (step == 1) + if pos > 0: + item = new_items[pos-1] + else: + if (sliceobject).start > 0: + c_node = parent._c_node.children + else: + c_node = parent._c_node.last + c_node = _findFollowingSibling( + c_node, tree._getNs(target._c_node), target._c_node.name, + (sliceobject).start - 1) + if c_node is NULL: + while pos < len(new_items): + cetree.appendChildToElement(parent, new_items[pos]) + pos += 1 + return + item = cetree.elementFactory(parent._doc, c_node) + while pos < len(new_items): + add = item.addnext + item = new_items[pos] + add(item) + pos += 1 + +################################################################################ +# Data type support in subclasses + +cdef class ObjectifiedDataElement(ObjectifiedElement): + """This is the base class for all data type Elements. Subclasses should + override the 'pyval' property and possibly the __str__ method. + """ + @property + def pyval(self): + return textOf(self._c_node) + + def __str__(self): + return textOf(self._c_node) or '' + + def __repr__(self): + return textOf(self._c_node) or '' + + def _setText(self, s): + """For use in subclasses only. Don't use unless you know what you are + doing. + """ + cetree.setNodeText(self._c_node, s) + + +cdef class NumberElement(ObjectifiedDataElement): + cdef object _parse_value + + def _setValueParser(self, function): + """Set the function that parses the Python value from a string. + + Do not use this unless you know what you are doing. + """ + self._parse_value = function + + @property + def pyval(self): + return _parseNumber(self) + + def __int__(self): + return int(_parseNumber(self)) + + def __float__(self): + return float(_parseNumber(self)) + + def __complex__(self): + return complex(_parseNumber(self)) + + def __str__(self): + return unicode(_parseNumber(self)) + + def __repr__(self): + return repr(_parseNumber(self)) + + def __oct__(self): + return oct(_parseNumber(self)) + + def __hex__(self): + return hex(_parseNumber(self)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(_parseNumber(self)) + + def __add__(self, other): + return _numericValueOf(self) + _numericValueOf(other) + + def __radd__(self, other): + return _numericValueOf(other) + _numericValueOf(self) + + def __sub__(self, other): + return _numericValueOf(self) - _numericValueOf(other) + + def __rsub__(self, other): + return _numericValueOf(other) - _numericValueOf(self) + + def __mul__(self, other): + return _numericValueOf(self) * _numericValueOf(other) + + def __rmul__(self, other): + return _numericValueOf(other) * _numericValueOf(self) + + def __div__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __rdiv__(self, other): + return _numericValueOf(other) / _numericValueOf(self) + + def __truediv__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __rtruediv__(self, other): + return _numericValueOf(other) / _numericValueOf(self) + + def __floordiv__(self, other): + return _numericValueOf(self) // _numericValueOf(other) + + def __rfloordiv__(self, other): + return _numericValueOf(other) // _numericValueOf(self) + + def __mod__(self, other): + return _numericValueOf(self) % _numericValueOf(other) + + def __rmod__(self, other): + return _numericValueOf(other) % _numericValueOf(self) + + def __divmod__(self, other): + return divmod(_numericValueOf(self), _numericValueOf(other)) + + def __rdivmod__(self, other): + return divmod(_numericValueOf(other), _numericValueOf(self)) + + def __pow__(self, other, modulo): + if modulo is None: + return _numericValueOf(self) ** _numericValueOf(other) + else: + return pow(_numericValueOf(self), _numericValueOf(other), modulo) + + def __rpow__(self, other, modulo): + if modulo is None: + return _numericValueOf(other) ** _numericValueOf(self) + else: + return pow(_numericValueOf(other), _numericValueOf(self), modulo) + + def __neg__(self): + return - _numericValueOf(self) + + def __pos__(self): + return + _numericValueOf(self) + + def __abs__(self): + return abs( _numericValueOf(self) ) + + def __bool__(self): + return bool(_numericValueOf(self)) + + def __invert__(self): + return ~ _numericValueOf(self) + + def __lshift__(self, other): + return _numericValueOf(self) << _numericValueOf(other) + + def __rlshift__(self, other): + return _numericValueOf(other) << _numericValueOf(self) + + def __rshift__(self, other): + return _numericValueOf(self) >> _numericValueOf(other) + + def __rrshift__(self, other): + return _numericValueOf(other) >> _numericValueOf(self) + + def __and__(self, other): + return _numericValueOf(self) & _numericValueOf(other) + + def __rand__(self, other): + return _numericValueOf(other) & _numericValueOf(self) + + def __or__(self, other): + return _numericValueOf(self) | _numericValueOf(other) + + def __ror__(self, other): + return _numericValueOf(other) | _numericValueOf(self) + + def __xor__(self, other): + return _numericValueOf(self) ^ _numericValueOf(other) + + def __rxor__(self, other): + return _numericValueOf(other) ^ _numericValueOf(self) + + +cdef class IntElement(NumberElement): + def _init(self): + self._parse_value = int + + def __index__(self): + return int(_parseNumber(self)) + + +cdef class FloatElement(NumberElement): + def _init(self): + self._parse_value = float + + +cdef class StringElement(ObjectifiedDataElement): + """String data class. + + Note that this class does *not* support the sequence protocol of strings: + len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. + Instead, use the .text attribute to get a 'real' string. + """ + @property + def pyval(self): + return textOf(self._c_node) or '' + + def __repr__(self): + return repr(textOf(self._c_node) or '') + + def strlen(self): + text = textOf(self._c_node) + if text is None: + return 0 + else: + return len(text) + + def __bool__(self): + return bool(textOf(self._c_node)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(textOf(self._c_node) or '') + + def __add__(self, other): + text = _strValueOf(self) + other = _strValueOf(other) + return text + other + + def __radd__(self, other): + text = _strValueOf(self) + other = _strValueOf(other) + return other + text + + def __mul__(self, other): + if isinstance(self, StringElement): + return (textOf((self)._c_node) or '') * _numericValueOf(other) + elif isinstance(other, StringElement): + return _numericValueOf(self) * (textOf((other)._c_node) or '') + else: + return NotImplemented + + def __rmul__(self, other): + return _numericValueOf(other) * (textOf((self)._c_node) or '') + + def __mod__(self, other): + return (_strValueOf(self) or '') % other + + def __int__(self): + return int(textOf(self._c_node)) + + def __float__(self): + return float(textOf(self._c_node)) + + def __complex__(self): + return complex(textOf(self._c_node)) + + +cdef class NoneElement(ObjectifiedDataElement): + def __str__(self): + return "None" + + def __repr__(self): + return "None" + + def __bool__(self): + return False + + def __richcmp__(self, other, int op): + if other is None or self is None: + return python.PyObject_RichCompare(None, None, op) + if isinstance(self, NoneElement): + return python.PyObject_RichCompare(None, other, op) + else: + return python.PyObject_RichCompare(self, None, op) + + def __hash__(self): + return hash(None) + + @property + def pyval(self): + return None + + +cdef class BoolElement(IntElement): + """Boolean type base on string values: 'true' or 'false'. + + Note that this inherits from IntElement to mimic the behaviour of + Python's bool type. + """ + def _init(self): + self._parse_value = _parseBool # wraps as Python callable + + def __bool__(self): + return _parseBool(textOf(self._c_node)) + + def __int__(self): + return 0 + _parseBool(textOf(self._c_node)) + + def __float__(self): + return 0.0 + _parseBool(textOf(self._c_node)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(_parseBool(textOf(self._c_node))) + + def __str__(self): + return unicode(_parseBool(textOf(self._c_node))) + + def __repr__(self): + return repr(_parseBool(textOf(self._c_node))) + + @property + def pyval(self): + return _parseBool(textOf(self._c_node)) + + +cdef _checkBool(s): + cdef int value = -1 + if s is not None: + value = __parseBoolAsInt(s) + if value == -1: + raise ValueError + + +cdef bint _parseBool(s) except -1: + cdef int value + if s is None: + return False + value = __parseBoolAsInt(s) + if value == -1: + raise ValueError, f"Invalid boolean value: '{s}'" + return value + + +cdef inline int __parseBoolAsInt(text) except -2: + if text == 'false': + return 0 + elif text == 'true': + return 1 + elif text == '0': + return 0 + elif text == '1': + return 1 + return -1 + + +cdef object _parseNumber(NumberElement element): + return element._parse_value(textOf(element._c_node)) + + +cdef enum NumberParserState: + NPS_SPACE_PRE = 0 + NPS_SIGN = 1 + NPS_DIGITS = 2 + NPS_POINT_LEAD = 3 + NPS_POINT = 4 + NPS_FRACTION = 5 + NPS_EXP = 6 + NPS_EXP_SIGN = 7 + NPS_DIGITS_EXP = 8 + NPS_SPACE_TAIL = 9 + NPS_INF1 = 20 + NPS_INF2 = 21 + NPS_INF3 = 22 + NPS_NAN1 = 23 + NPS_NAN2 = 24 + NPS_NAN3 = 25 + NPS_ERROR = 99 + + +ctypedef fused bytes_unicode: + bytes + unicode + + +cdef _checkNumber(bytes_unicode s, bint allow_float): + cdef Py_UCS4 c + cdef NumberParserState state = NPS_SPACE_PRE + + for c in s: + if c in '0123456789': + if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP): + pass + elif state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_DIGITS + elif state in (NPS_POINT_LEAD, NPS_POINT): + state = NPS_FRACTION + elif state in (NPS_EXP, NPS_EXP_SIGN): + state = NPS_DIGITS_EXP + else: + state = NPS_ERROR + else: + if c == '.': + if state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_POINT_LEAD + elif state == NPS_DIGITS: + state = NPS_POINT + else: + state = NPS_ERROR + if not allow_float: + state = NPS_ERROR + elif c in '-+': + if state == NPS_SPACE_PRE: + state = NPS_SIGN + elif state == NPS_EXP: + state = NPS_EXP_SIGN + else: + state = NPS_ERROR + elif c == 'E': + if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION): + state = NPS_EXP + else: + state = NPS_ERROR + if not allow_float: + state = NPS_ERROR + # Allow INF and NaN. XMLSchema requires case, we don't, like Python. + elif c in 'iI': + state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR + elif c in 'fF': + state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR + elif c in 'aA': + state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR + elif c in 'nN': + # Python also allows [+-]NaN, so let's accept that. + if state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_NAN1 if allow_float else NPS_ERROR + elif state == NPS_NAN2: + state = NPS_NAN3 + elif state == NPS_INF1: + state = NPS_INF2 + else: + state = NPS_ERROR + # Allow spaces around text values. + else: + if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20': + if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL): + pass + elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3): + state = NPS_SPACE_TAIL + else: + state = NPS_ERROR + else: + state = NPS_ERROR + + if state == NPS_ERROR: + break + + if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL): + raise ValueError + + +cdef _checkInt(s): + return _checkNumber(s, allow_float=False) + + +cdef _checkFloat(s): + return _checkNumber(s, allow_float=True) + + +cdef object _strValueOf(obj): + if python._isString(obj): + return obj + if isinstance(obj, _Element): + return textOf((<_Element>obj)._c_node) or '' + if obj is None: + return '' + return unicode(obj) + + +cdef object _numericValueOf(obj): + if isinstance(obj, NumberElement): + return _parseNumber(obj) + try: + # not always numeric, but Python will raise the right exception + return obj.pyval + except AttributeError: + pass + return obj + + +cdef _richcmpPyvals(left, right, int op): + left = getattr(left, 'pyval', left) + right = getattr(right, 'pyval', right) + return python.PyObject_RichCompare(left, right, op) + + +################################################################################ +# Python type registry + +cdef class PyType: + """PyType(self, name, type_check, type_class, stringify=None) + User defined type. + + Named type that contains a type check function, a type class that + inherits from ObjectifiedDataElement and an optional "stringification" + function. The type check must take a string as argument and raise + ValueError or TypeError if it cannot handle the string value. It may be + None in which case it is not considered for type guessing. For registered + named types, the 'stringify' function (or unicode() if None) is used to + convert a Python object with type name 'name' to the string representation + stored in the XML tree. + + Example:: + + PyType('int', int, MyIntClass).register() + + Note that the order in which types are registered matters. The first + matching type will be used. + """ + cdef readonly object name + cdef readonly object type_check + cdef readonly object stringify + cdef object _type + cdef list _schema_types + def __init__(self, name, type_check, type_class, stringify=None): + if isinstance(name, bytes): + name = (name).decode('ascii') + elif not isinstance(name, unicode): + raise TypeError, "Type name must be a string" + if type_check is not None and not callable(type_check): + raise TypeError, "Type check function must be callable (or None)" + if name != TREE_PYTYPE_NAME and \ + not issubclass(type_class, ObjectifiedDataElement): + raise TypeError, \ + "Data classes must inherit from ObjectifiedDataElement" + self.name = name + self._type = type_class + self.type_check = type_check + if stringify is None: + stringify = unicode + self.stringify = stringify + self._schema_types = [] + + def __repr__(self): + return "PyType(%s, %s)" % (self.name, self._type.__name__) + + def register(self, before=None, after=None): + """register(self, before=None, after=None) + + Register the type. + + The additional keyword arguments 'before' and 'after' accept a + sequence of type names that must appear before/after the new type in + the type list. If any of them is not currently known, it is simply + ignored. Raises ValueError if the dependencies cannot be fulfilled. + """ + if self.name == TREE_PYTYPE_NAME: + raise ValueError, "Cannot register tree type" + if self.type_check is not None: + for item in _TYPE_CHECKS: + if item[0] is self.type_check: + _TYPE_CHECKS.remove(item) + break + entry = (self.type_check, self) + first_pos = 0 + last_pos = -1 + if before or after: + if before is None: + before = () + elif after is None: + after = () + for i, (check, pytype) in enumerate(_TYPE_CHECKS): + if last_pos == -1 and pytype.name in before: + last_pos = i + if pytype.name in after: + first_pos = i+1 + if last_pos == -1: + _TYPE_CHECKS.append(entry) + elif first_pos > last_pos: + raise ValueError, "inconsistent before/after dependencies" + else: + _TYPE_CHECKS.insert(last_pos, entry) + + _PYTYPE_DICT[self.name] = self + for xs_type in self._schema_types: + _SCHEMA_TYPE_DICT[xs_type] = self + + def unregister(self): + "unregister(self)" + if _PYTYPE_DICT.get(self.name) is self: + del _PYTYPE_DICT[self.name] + for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()): + if pytype is self: + del _SCHEMA_TYPE_DICT[xs_type] + if self.type_check is None: + return + try: + _TYPE_CHECKS.remove( (self.type_check, self) ) + except ValueError: + pass + + property xmlSchemaTypes: + """The list of XML Schema datatypes this Python type maps to. + + Note that this must be set before registering the type! + """ + def __get__(self): + return self._schema_types + def __set__(self, types): + self._schema_types = list(map(unicode, types)) + + +cdef dict _PYTYPE_DICT = {} +cdef dict _SCHEMA_TYPE_DICT = {} +cdef list _TYPE_CHECKS = [] + +cdef unicode _xml_bool(value): + return "true" if value else "false" + +cdef unicode _xml_float(value): + if _float_is_inf(value): + if value > 0: + return "INF" + return "-INF" + if _float_is_nan(value): + return "NaN" + return unicode(repr(value)) + +cdef _pytypename(obj): + return "str" if python._isString(obj) else _typename(obj) + +def pytypename(obj): + """pytypename(obj) + + Find the name of the corresponding PyType for a Python object. + """ + return _pytypename(obj) + +cdef _registerPyTypes(): + pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python + pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort", + "unsignedByte", "nonPositiveInteger", + "negativeInteger", "long", "nonNegativeInteger", + "unsignedLong", "unsignedInt", "positiveInteger",) + pytype.register() + + # 'long' type just for backwards compatibility + pytype = PyType('long', None, IntElement) + pytype.register() + + pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python + pytype.xmlSchemaTypes = ("double", "float") + pytype.register() + + pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python + pytype.xmlSchemaTypes = ("boolean",) + pytype.register() + + pytype = PyType('str', None, StringElement) + pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language", + "Name", "NCName", "ID", "IDREF", "ENTITY", + "NMTOKEN", ) + pytype.register() + + # since lxml 2.0 + pytype = PyType('NoneType', None, NoneElement) + pytype.register() + + # backwards compatibility + pytype = PyType('none', None, NoneElement) + pytype.register() + +# non-registered PyType for inner tree elements +cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) + +_registerPyTypes() + +def getRegisteredTypes(): + """getRegisteredTypes() + + Returns a list of the currently registered PyType objects. + + To add a new type, retrieve this list and call unregister() for all + entries. Then add the new type at a suitable position (possibly replacing + an existing one) and call register() for all entries. + + This is necessary if the new type interferes with the type check functions + of existing ones (normally only int/float/bool) and must the tried before + other types. To add a type that is not yet parsable by the current type + check functions, you can simply register() it, which will append it to the + end of the type list. + """ + cdef list types = [] + cdef set known = set() + for check, pytype in _TYPE_CHECKS: + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + for pytype in _PYTYPE_DICT.values(): + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + return types + +cdef PyType _guessPyType(value, PyType defaulttype): + if value is None: + return None + for type_check, tested_pytype in _TYPE_CHECKS: + try: + type_check(value) + return tested_pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specified type => ignore + pass + return defaulttype + +cdef object _guessElementClass(tree.xmlNode* c_node): + value = textOf(c_node) + if value is None: + return None + if value == '': + return StringElement + + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(value) + return (pytype)._type + except IGNORABLE_ERRORS: + pass + return None + +################################################################################ +# adapted ElementMaker supports registered PyTypes + +@cython.final +@cython.internal +cdef class _ObjectifyElementMakerCaller: + cdef object _tag + cdef object _nsmap + cdef object _element_factory + cdef bint _annotate + + def __call__(self, *children, **attrib): + "__call__(self, *children, **attrib)" + cdef _ObjectifyElementMakerCaller elementMaker + cdef _Element element + cdef _Element childElement + cdef bint has_children + cdef bint has_string_value + if self._element_factory is None: + element = _makeElement(self._tag, None, attrib, self._nsmap) + else: + element = self._element_factory(self._tag, attrib, self._nsmap) + + pytype_name = None + has_children = False + has_string_value = False + for child in children: + if child is None: + if len(children) == 1: + cetree.setAttributeValue( + element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + elif python._isString(child): + _add_text(element, child) + has_string_value = True + elif isinstance(child, _Element): + cetree.appendChildToElement(element, <_Element>child) + has_children = True + elif isinstance(child, _ObjectifyElementMakerCaller): + elementMaker = <_ObjectifyElementMakerCaller>child + if elementMaker._element_factory is None: + cetree.makeSubElement(element, elementMaker._tag, + None, None, None, None) + else: + childElement = elementMaker._element_factory( + elementMaker._tag) + cetree.appendChildToElement(element, childElement) + has_children = True + elif isinstance(child, dict): + for name, value in child.items(): + # keyword arguments in attrib take precedence + if name in attrib: + continue + pytype = _PYTYPE_DICT.get(_typename(value)) + if pytype is not None: + value = (pytype).stringify(value) + elif not python._isString(value): + value = unicode(value) + cetree.setAttributeValue(element, name, value) + else: + if pytype_name is not None: + # concatenation always makes the result a string + has_string_value = True + pytype_name = _typename(child) + pytype = _PYTYPE_DICT.get(_typename(child)) + if pytype is not None: + _add_text(element, (pytype).stringify(child)) + else: + has_string_value = True + child = unicode(child) + _add_text(element, child) + + if self._annotate and not has_children: + if has_string_value: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str") + elif pytype_name is not None: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) + + return element + +cdef _add_text(_Element elem, text): + # add text to the tree in construction, either as element text or + # tail text, depending on the current tree state + cdef tree.xmlNode* c_child + c_child = cetree.findChildBackwards(elem._c_node, 0) + if c_child is not NULL: + old = cetree.tailOf(c_child) + if old is not None: + text = old + text + cetree.setTailText(c_child, text) + else: + old = cetree.textOf(elem._c_node) + if old is not None: + text = old + text + cetree.setNodeText(elem._c_node, text) + +cdef class ElementMaker: + """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) + + An ElementMaker that can be used for constructing trees. + + Example:: + + >>> M = ElementMaker(annotate=False) + >>> attributes = {'class': 'par'} + >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) ) + + >>> from lxml.etree import tostring + >>> print(tostring(html, method='html').decode('ascii')) +

hello
objectify

+ + To create tags that are not valid Python identifiers, call the factory + directly and pass the tag name as first argument:: + + >>> root = M('tricky-tag', 'some text') + >>> print(root.tag) + tricky-tag + >>> print(root.text) + some text + + Note that this module has a predefined ElementMaker instance called ``E``. + """ + cdef object _makeelement + cdef object _namespace + cdef object _nsmap + cdef bint _annotate + cdef dict _cache + def __init__(self, *, namespace=None, nsmap=None, annotate=True, + makeelement=None): + if nsmap is None: + nsmap = _DEFAULT_NSMAP if annotate else {} + self._nsmap = nsmap + self._namespace = None if namespace is None else "{%s}" % namespace + self._annotate = annotate + if makeelement is not None: + if not callable(makeelement): + raise TypeError( + f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}") + self._makeelement = makeelement + else: + self._makeelement = None + self._cache = {} + + @cython.final + cdef _build_element_maker(self, tag, bint caching): + cdef _ObjectifyElementMakerCaller element_maker + element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller) + if self._namespace is not None and tag[0] != "{": + element_maker._tag = self._namespace + tag + else: + element_maker._tag = tag + element_maker._nsmap = self._nsmap + element_maker._annotate = self._annotate + element_maker._element_factory = self._makeelement + if caching: + if len(self._cache) > 200: + self._cache.clear() + self._cache[tag] = element_maker + return element_maker + + def __getattr__(self, tag): + element_maker = self._cache.get(tag) + if element_maker is None: + return self._build_element_maker(tag, caching=True) + return element_maker + + def __call__(self, tag, *args, **kwargs): + element_maker = self._cache.get(tag) + if element_maker is None: + element_maker = self._build_element_maker( + tag, caching=not is_special_method(tag)) + return element_maker(*args, **kwargs) + +################################################################################ +# Recursive element dumping + +cdef bint __RECURSIVE_STR = 0 # default: off + +def enable_recursive_str(on=True): + """enable_recursive_str(on=True) + + Enable a recursively generated tree representation for str(element), + based on objectify.dump(element). + """ + global __RECURSIVE_STR + __RECURSIVE_STR = on + +def dump(_Element element not None): + """dump(_Element element not None) + + Return a recursively generated string representation of an element. + """ + return _dump(element, 0) + +cdef object _dump(_Element element, int indent): + indentstr = " " * indent + if isinstance(element, ObjectifiedDataElement): + value = repr(element) + else: + value = textOf(element._c_node) + if value is not None: + if not value.strip(): + value = None + else: + value = repr(value) + result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n" + xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS + pytype_ns = "{%s}" % PYTYPE_NAMESPACE + for name, value in sorted(cetree.iterattributes(element, 3)): + if '{' in name: + if name == PYTYPE_ATTRIBUTE: + if value == TREE_PYTYPE_NAME: + continue + else: + name = name.replace(pytype_ns, 'py:') + name = name.replace(xsi_ns, 'xsi:') + result += f"{indentstr} * {name} = {value!r}\n" + + indent += 1 + for child in element.iterchildren(): + result += _dump(child, indent) + if indent == 1: + return result[:-1] # strip last '\n' + else: + return result + + +################################################################################ +# Pickle support for objectified ElementTree + +def __unpickleElementTree(data): + return etree.ElementTree(fromstring(data)) + +cdef _setupPickle(elementTreeReduceFunction): + import copyreg + copyreg.pickle(etree._ElementTree, + elementTreeReduceFunction, __unpickleElementTree) + +def pickleReduceElementTree(obj): + return __unpickleElementTree, (etree.tostring(obj),) + +_setupPickle(pickleReduceElementTree) +del pickleReduceElementTree + +################################################################################ +# Element class lookup + +cdef class ObjectifyElementClassLookup(ElementClassLookup): + """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) + Element class lookup method that uses the objectify classes. + """ + cdef object empty_data_class + cdef object tree_class + def __init__(self, tree_class=None, empty_data_class=None): + """Lookup mechanism for objectify. + + The default Element classes can be replaced by passing subclasses of + ObjectifiedElement and ObjectifiedDataElement as keyword arguments. + 'tree_class' defines inner tree classes (defaults to + ObjectifiedElement), 'empty_data_class' defines the default class for + empty data elements (defaults to StringElement). + """ + self._lookup_function = _lookupElementClass + if tree_class is None: + tree_class = ObjectifiedElement + self.tree_class = tree_class + if empty_data_class is None: + empty_data_class = StringElement + self.empty_data_class = empty_data_class + +cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): + cdef ObjectifyElementClassLookup lookup + lookup = state + # if element has children => no data class + if cetree.hasChild(c_node): + return lookup.tree_class + + # if element is defined as xsi:nil, return NoneElement class + if "true" == cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "nil"): + return NoneElement + + # check for Python type hint + value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if value is not None: + if value == TREE_PYTYPE_NAME: + return lookup.tree_class + py_type = _PYTYPE_DICT.get(value) + if py_type is not None: + return py_type._type + # unknown 'pyval' => try to figure it out ourself, just go on + + # check for XML Schema type hint + value = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + + if value is not None: + schema_type = _SCHEMA_TYPE_DICT.get(value) + if schema_type is None and ':' in value: + prefix, value = value.split(':', 1) + schema_type = _SCHEMA_TYPE_DICT.get(value) + if schema_type is not None: + return schema_type._type + + # otherwise determine class based on text content type + el_class = _guessElementClass(c_node) + if el_class is not None: + return el_class + + # if element is a root node => default to tree node + if c_node.parent is NULL or not tree._isElement(c_node.parent): + return lookup.tree_class + + return lookup.empty_data_class + + +################################################################################ +# Type annotations + +cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): + if pytype is None: + return None + value = textOf(c_node) + try: + pytype.type_check(value) + return pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specified type => ignore + pass + return None + +def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, + empty_pytype=None): + """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) + + Recursively annotates the elements of an XML tree with 'pytype' + attributes. + + If the 'ignore_old' keyword argument is True (the default), current 'pytype' + attributes will be ignored and replaced. Otherwise, they will be checked + and only replaced if they no longer fit the current text value. + + Setting the keyword argument ``ignore_xsi`` to True makes the function + additionally ignore existing ``xsi:type`` annotations. The default is to + use them as a type hint. + + The default annotation of empty elements can be set with the + ``empty_pytype`` keyword argument. The default is not to annotate empty + elements. Pass 'str', for example, to make string values the default. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) + +def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, + empty_type=None): + """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) + + Recursively annotates the elements of an XML tree with 'xsi:type' + attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be + checked and only replaced if they no longer fit the current text value. + + Note that the mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first if you define additional types. + + Setting the keyword argument ``ignore_pytype`` to True makes the function + additionally ignore existing ``pytype`` annotations. The default is to + use them as a type hint. + + The default annotation of empty elements can be set with the + ``empty_type`` keyword argument. The default is not to annotate empty + elements. Pass 'string', for example, to make string values the default. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) + +def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, + empty_pytype=None, empty_type=None, annotate_xsi=0, + annotate_pytype=1): + """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) + + Recursively annotates the elements of an XML tree with 'xsi:type' + and/or 'py:pytype' attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'py:pytype' attributes will be ignored for the type annotation. Set to False + if you want reuse existing 'py:pytype' information (iff appropriate for the + element text value). + + If the 'ignore_xsi' keyword argument is False (the default), existing + 'xsi:type' attributes will be used for the type annotation, if they fit the + element text values. + + Note that the mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first if you define additional types. + + The default 'py:pytype' annotation of empty elements can be set with the + ``empty_pytype`` keyword argument. Pass 'str', for example, to make + string values the default. + + The default 'xsi:type' annotation of empty elements can be set with the + ``empty_type`` keyword argument. The default is not to annotate empty + elements. Pass 'string', for example, to make string values the default. + + The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' + (default: 1) control which kind(s) of annotation to use. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, + ignore_old, empty_type, empty_pytype) + + +cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, + empty_type_name, empty_pytype_name): + cdef _Document doc + cdef tree.xmlNode* c_node + cdef PyType empty_pytype, StrType, NoneType + + if not annotate_xsi and not annotate_pytype: + return + + if empty_type_name is not None: + if isinstance(empty_type_name, bytes): + empty_type_name = (empty_type_name).decode("ascii") + empty_pytype = _SCHEMA_TYPE_DICT.get(empty_type_name) + elif empty_pytype_name is not None: + if isinstance(empty_pytype_name, bytes): + empty_pytype_name = (empty_pytype_name).decode("ascii") + empty_pytype = _PYTYPE_DICT.get(empty_pytype_name) + else: + empty_pytype = None + + StrType = _PYTYPE_DICT.get('str') + NoneType = _PYTYPE_DICT.get('NoneType') + + doc = element._doc + c_node = element._c_node + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + _annotate_element(c_node, doc, annotate_xsi, annotate_pytype, + ignore_xsi, ignore_pytype, + empty_type_name, empty_pytype, StrType, NoneType) + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + +cdef int _annotate_element(tree.xmlNode* c_node, _Document doc, + bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, + empty_type_name, PyType empty_pytype, + PyType StrType, PyType NoneType) except -1: + cdef tree.xmlNs* c_ns + cdef PyType pytype = None + typename = None + istree = 0 + + # if element is defined as xsi:nil, represent it as None + if cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "nil") == "true": + pytype = NoneType + + if pytype is None and not ignore_xsi: + # check that old xsi type value is valid + typename = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + if typename is not None: + pytype = _SCHEMA_TYPE_DICT.get(typename) + if pytype is None and ':' in typename: + prefix, typename = typename.split(':', 1) + pytype = _SCHEMA_TYPE_DICT.get(typename) + if pytype is not None and pytype is not StrType: + # StrType does not have a typecheck but is the default + # anyway, so just accept it if given as type + # information + pytype = _check_type(c_node, pytype) + if pytype is None: + typename = None + + if pytype is None and not ignore_pytype: + # check that old pytype value is valid + old_pytypename = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if old_pytypename is not None: + if old_pytypename == TREE_PYTYPE_NAME: + if not cetree.hasChild(c_node): + # only case where we should keep it, + # everything else is clear enough + pytype = TREE_PYTYPE + else: + if old_pytypename == 'none': + # transition from lxml 1.x + old_pytypename = "NoneType" + pytype = _PYTYPE_DICT.get(old_pytypename) + if pytype is not None and pytype is not StrType: + # StrType does not have a typecheck but is the + # default anyway, so just accept it if given as + # type information + pytype = _check_type(c_node, pytype) + + if pytype is None: + # try to guess type + if not cetree.hasChild(c_node): + # element has no children => data class + pytype = _guessPyType(textOf(c_node), StrType) + else: + istree = 1 + + if pytype is None: + # use default type for empty elements + if cetree.hasText(c_node): + pytype = StrType + else: + pytype = empty_pytype + if typename is None: + typename = empty_type_name + + if pytype is not None: + if typename is None: + if not istree: + if pytype._schema_types: + # pytype->xsi:type is a 1:n mapping + # simply take the first + typename = pytype._schema_types[0] + elif typename not in pytype._schema_types: + typename = pytype._schema_types[0] + + if annotate_xsi: + if typename is None or istree: + cetree.delAttributeFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, "type") + else: + # update or create attribute + typename_utf8 = cetree.utf8(typename) + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_NS, 'xsd') + if c_ns is not NULL: + if b':' in typename_utf8: + prefix, name = typename_utf8.split(b':', 1) + if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0': + typename_utf8 = name + elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0: + typename_utf8 = (c_ns.prefix) + b':' + name + elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0': + typename_utf8 = (c_ns.prefix) + b':' + typename_utf8 + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_INSTANCE_NS, 'xsi') + tree.xmlSetNsProp(c_node, c_ns, "type", _xcstr(typename_utf8)) + + if annotate_pytype: + if pytype is None: + # delete attribute if it exists + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + else: + # update or create attribute + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _PYTYPE_NAMESPACE, 'py') + pytype_name = cetree.utf8(pytype.name) + tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, + _xcstr(pytype_name)) + if pytype is NoneType: + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_INSTANCE_NS, 'xsi') + tree.xmlSetNsProp(c_node, c_ns, "nil", "true") + + return 0 + +cdef object _strip_attributes = etree.strip_attributes +cdef object _cleanup_namespaces = etree.cleanup_namespaces + +def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True, + bint xsi_nil=False, bint cleanup_namespaces=False): + """deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False) + + Recursively de-annotate the elements of an XML tree by removing 'py:pytype' + and/or 'xsi:type' attributes and/or 'xsi:nil' attributes. + + If the 'pytype' keyword argument is True (the default), 'py:pytype' + attributes will be removed. If the 'xsi' keyword argument is True (the + default), 'xsi:type' attributes will be removed. + If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil' + attributes will be removed. + + Note that this does not touch the namespace declarations by + default. If you want to remove unused namespace declarations from + the tree, pass the option ``cleanup_namespaces=True``. + """ + cdef list attribute_names = [] + + if pytype: + attribute_names.append(PYTYPE_ATTRIBUTE) + if xsi: + attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR) + if xsi_nil: + attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR) + + _strip_attributes(element_or_tree, *attribute_names) + if cleanup_namespaces: + _cleanup_namespaces(element_or_tree) + +################################################################################ +# Module level parser setup + +cdef object __DEFAULT_PARSER +__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) +__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() ) + +cdef object objectify_parser +objectify_parser = __DEFAULT_PARSER + +def set_default_parser(new_parser = None): + """set_default_parser(new_parser = None) + + Replace the default parser used by objectify's Element() and + fromstring() functions. + + The new parser must be an etree.XMLParser. + + Call without arguments to reset to the original parser. + """ + global objectify_parser + if new_parser is None: + objectify_parser = __DEFAULT_PARSER + elif isinstance(new_parser, etree.XMLParser): + objectify_parser = new_parser + else: + raise TypeError, "parser must inherit from lxml.etree.XMLParser" + +def makeparser(**kw): + """makeparser(remove_blank_text=True, **kw) + + Create a new XML parser for objectify trees. + + You can pass all keyword arguments that are supported by + ``etree.XMLParser()``. Note that this parser defaults to removing + blank text. You can disable this by passing the + ``remove_blank_text`` boolean keyword option yourself. + """ + if 'remove_blank_text' not in kw: + kw['remove_blank_text'] = True + parser = etree.XMLParser(**kw) + parser.set_element_class_lookup( ObjectifyElementClassLookup() ) + return parser + +cdef _Element _makeElement(tag, text, attrib, nsmap): + return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap) + +################################################################################ +# Module level factory functions + +cdef object _fromstring +_fromstring = etree.fromstring + +SubElement = etree.SubElement + +def fromstring(xml, parser=None, *, base_url=None): + """fromstring(xml, parser=None, base_url=None) + + Objectify specific version of the lxml.etree fromstring() function + that uses the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + if parser is None: + parser = objectify_parser + return _fromstring(xml, parser, base_url=base_url) + +def XML(xml, parser=None, *, base_url=None): + """XML(xml, parser=None, base_url=None) + + Objectify specific version of the lxml.etree XML() literal factory + that uses the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + if parser is None: + parser = objectify_parser + return _fromstring(xml, parser, base_url=base_url) + +cdef object _parse +_parse = etree.parse + +def parse(f, parser=None, *, base_url=None): + """parse(f, parser=None, base_url=None) + + Parse a file or file-like object with the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword allows setting a URL for the document + when parsing from a file-like object. This is needed when looking + up external entities (DTD, XInclude, ...) with relative paths. + """ + if parser is None: + parser = objectify_parser + return _parse(f, parser, base_url=base_url) + +cdef dict _DEFAULT_NSMAP = { + "py" : PYTYPE_NAMESPACE, + "xsi" : XML_SCHEMA_INSTANCE_NS, + "xsd" : XML_SCHEMA_NS +} + +E = ElementMaker() + +def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): + """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) + + Objectify specific version of the lxml.etree Element() factory that + always creates a structural (tree) element. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + if attrib is not None: + if _attributes: + attrib = dict(attrib) + attrib.update(_attributes) + _attributes = attrib + if _pytype is None: + _pytype = TREE_PYTYPE_NAME + if nsmap is None: + nsmap = _DEFAULT_NSMAP + _attributes[PYTYPE_ATTRIBUTE] = _pytype + return _makeElement(_tag, None, _attributes, nsmap) + +def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, + **_attributes): + """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) + + Create a new element from a Python value and XML attributes taken from + keyword arguments or a dictionary passed as second argument. + + Automatically adds a 'pytype' attribute for the Python type of the value, + if the type can be identified. If '_pytype' or '_xsi' are among the + keyword arguments, they will be used instead. + + If the _value argument is an ObjectifiedDataElement instance, its py:pytype, + xsi:type and other attributes and nsmap are reused unless they are redefined + in attrib and/or keyword arguments. + """ + if nsmap is None: + nsmap = _DEFAULT_NSMAP + if attrib is not None and attrib: + if _attributes: + attrib = dict(attrib) + attrib.update(_attributes) + _attributes = attrib + if isinstance(_value, ObjectifiedElement): + if _pytype is None: + if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP: + # special case: no change! + return _value.__copy__() + if isinstance(_value, ObjectifiedDataElement): + # reuse existing nsmap unless redefined in nsmap parameter + temp = _value.nsmap + if temp is not None and temp: + temp = dict(temp) + temp.update(nsmap) + nsmap = temp + # reuse existing attributes unless redefined in attrib/_attributes + temp = _value.attrib + if temp is not None and temp: + temp = dict(temp) + temp.update(_attributes) + _attributes = temp + # reuse existing xsi:type or py:pytype attributes, unless provided as + # arguments + if _xsi is None and _pytype is None: + _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + _pytype = _attributes.get(PYTYPE_ATTRIBUTE) + + if _xsi is not None: + if ':' in _xsi: + prefix, name = _xsi.split(':', 1) + ns = nsmap.get(prefix) + if ns != XML_SCHEMA_NS: + raise ValueError, "XSD types require the XSD namespace" + elif nsmap is _DEFAULT_NSMAP: + name = _xsi + _xsi = 'xsd:' + _xsi + else: + name = _xsi + for prefix, ns in nsmap.items(): + if ns == XML_SCHEMA_NS: + if prefix is not None and prefix: + _xsi = prefix + ':' + _xsi + break + else: + raise ValueError, "XSD types require the XSD namespace" + _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi + if _pytype is None: + # allow using unregistered or even wrong xsi:type names + py_type = _SCHEMA_TYPE_DICT.get(_xsi) + if py_type is None: + py_type = _SCHEMA_TYPE_DICT.get(name) + if py_type is not None: + _pytype = py_type.name + + if _pytype is None: + _pytype = _pytypename(_value) + + if _value is None and _pytype != "str": + _pytype = _pytype or "NoneType" + strval = None + elif python._isString(_value): + strval = _value + elif isinstance(_value, bool): + if _value: + strval = "true" + else: + strval = "false" + else: + py_type = _PYTYPE_DICT.get(_pytype) + stringify = unicode if py_type is None else py_type.stringify + strval = stringify(_value) + + if _pytype is not None: + if _pytype == "NoneType" or _pytype == "none": + strval = None + _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true" + else: + # check if type information from arguments is valid + py_type = _PYTYPE_DICT.get(_pytype) + if py_type is not None: + if py_type.type_check is not None: + py_type.type_check(strval) + _attributes[PYTYPE_ATTRIBUTE] = _pytype + + return _makeElement("value", strval, _attributes, nsmap) + + +################################################################################ +# ObjectPath + +include "objectpath.pxi" -- cgit v1.2.3