aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lxml/extensions.pxi
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/extensions.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/extensions.pxi833
1 files changed, 833 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/extensions.pxi b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi
new file mode 100644
index 00000000..2a2c94ec
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi
@@ -0,0 +1,833 @@
+# support for extension functions in XPath and XSLT
+
+cdef class XPathError(LxmlError):
+ """Base class of all XPath errors.
+ """
+
+cdef class XPathEvalError(XPathError):
+ """Error during XPath evaluation.
+ """
+
+cdef class XPathFunctionError(XPathEvalError):
+ """Internal error looking up an XPath extension function.
+ """
+
+cdef class XPathResultError(XPathEvalError):
+ """Error handling an XPath result.
+ """
+
+
+# forward declarations
+
+ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf)
+cdef class _ExsltRegExp
+
+################################################################################
+# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
+
+@cython.internal
+cdef class _BaseContext:
+ cdef xpath.xmlXPathContext* _xpathCtxt
+ cdef _Document _doc
+ cdef dict _extensions
+ cdef list _namespaces
+ cdef list _global_namespaces
+ cdef dict _utf_refs
+ cdef dict _function_cache
+ cdef dict _eval_context_dict
+ cdef bint _build_smart_strings
+ # for exception handling and temporary reference keeping:
+ cdef _TempStore _temp_refs
+ cdef set _temp_documents
+ cdef _ExceptionContext _exc
+ cdef _ErrorLog _error_log
+
+ def __cinit__(self):
+ self._xpathCtxt = NULL
+
+ def __init__(self, namespaces, extensions, error_log, enable_regexp,
+ build_smart_strings):
+ cdef _ExsltRegExp _regexp
+ cdef dict new_extensions
+ cdef list ns
+ self._utf_refs = {}
+ self._global_namespaces = []
+ self._function_cache = {}
+ self._eval_context_dict = None
+ self._error_log = error_log
+
+ if extensions is not None:
+ # convert extensions to UTF-8
+ if isinstance(extensions, dict):
+ extensions = (extensions,)
+ # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
+ new_extensions = {}
+ for extension in extensions:
+ for (ns_uri, name), function in extension.items():
+ if name is None:
+ raise ValueError, "extensions must have non empty names"
+ ns_utf = self._to_utf(ns_uri)
+ name_utf = self._to_utf(name)
+ new_extensions[(ns_utf, name_utf)] = function
+ extensions = new_extensions or None
+
+ if namespaces is not None:
+ if isinstance(namespaces, dict):
+ namespaces = namespaces.items()
+ if namespaces:
+ ns = []
+ for prefix, ns_uri in namespaces:
+ if prefix is None or not prefix:
+ raise TypeError, \
+ "empty namespace prefix is not supported in XPath"
+ if ns_uri is None or not ns_uri:
+ raise TypeError, \
+ "setting default namespace is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ ns.append( (prefix_utf, ns_uri_utf) )
+ namespaces = ns
+ else:
+ namespaces = None
+
+ self._doc = None
+ self._exc = _ExceptionContext()
+ self._extensions = extensions
+ self._namespaces = namespaces
+ self._temp_refs = _TempStore()
+ self._temp_documents = set()
+ self._build_smart_strings = build_smart_strings
+
+ if enable_regexp:
+ _regexp = _ExsltRegExp()
+ _regexp._register_in_context(self)
+
+ cdef _BaseContext _copy(self):
+ cdef _BaseContext context
+ if self._namespaces is not None:
+ namespaces = self._namespaces[:]
+ else:
+ namespaces = None
+ context = self.__class__(namespaces, None, self._error_log, False,
+ self._build_smart_strings)
+ if self._extensions is not None:
+ context._extensions = self._extensions.copy()
+ return context
+
+ cdef bytes _to_utf(self, s):
+ "Convert to UTF-8 and keep a reference to the encoded string"
+ cdef python.PyObject* dict_result
+ if s is None:
+ return None
+ dict_result = python.PyDict_GetItem(self._utf_refs, s)
+ if dict_result is not NULL:
+ return <bytes>dict_result
+ utf = _utf8(s)
+ self._utf_refs[s] = utf
+ if python.IS_PYPY:
+ # use C level refs, PyPy refs are not enough!
+ python.Py_INCREF(utf)
+ return utf
+
+ cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept:
+ self._xpathCtxt = xpathCtxt
+ xpathCtxt.userData = <void*>self
+ # Need a cast here because older libxml2 releases do not use 'const' in the functype.
+ xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError
+
+ @cython.final
+ cdef _register_context(self, _Document doc):
+ self._doc = doc
+ self._exc.clear()
+
+ @cython.final
+ cdef _cleanup_context(self):
+ #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+ #self.unregisterGlobalNamespaces()
+ if python.IS_PYPY:
+ # clean up double refs in PyPy (see "_to_utf()" method)
+ for ref in self._utf_refs.itervalues():
+ python.Py_DECREF(ref)
+ self._utf_refs.clear()
+ self._eval_context_dict = None
+ self._doc = None
+
+ @cython.final
+ cdef _release_context(self):
+ if self._xpathCtxt is not NULL:
+ self._xpathCtxt.userData = NULL
+ self._xpathCtxt = NULL
+
+ # namespaces (internal UTF-8 methods with leading '_')
+
+ cdef addNamespace(self, prefix, ns_uri):
+ cdef list namespaces
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ new_item = (prefix_utf, ns_uri_utf)
+ if self._namespaces is None:
+ self._namespaces = [new_item]
+ else:
+ namespaces = []
+ for item in self._namespaces:
+ if item[0] == prefix_utf:
+ item = new_item
+ new_item = None
+ namespaces.append(item)
+ if new_item is not None:
+ namespaces.append(new_item)
+ self._namespaces = namespaces
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+ cdef registerNamespace(self, prefix, ns_uri):
+ if prefix is None:
+ raise TypeError, "empty prefix is not supported in XPath"
+ prefix_utf = self._to_utf(prefix)
+ ns_uri_utf = self._to_utf(ns_uri)
+ self._global_namespaces.append(prefix_utf)
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+ cdef registerLocalNamespaces(self):
+ if self._namespaces is None:
+ return
+ for prefix_utf, ns_uri_utf in self._namespaces:
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+ cdef registerGlobalNamespaces(self):
+ cdef list ns_prefixes = _find_all_extension_prefixes()
+ if python.PyList_GET_SIZE(ns_prefixes) > 0:
+ for prefix_utf, ns_uri_utf in ns_prefixes:
+ self._global_namespaces.append(prefix_utf)
+ xpath.xmlXPathRegisterNs(
+ self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+ cdef unregisterGlobalNamespaces(self):
+ if python.PyList_GET_SIZE(self._global_namespaces) > 0:
+ for prefix_utf in self._global_namespaces:
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _xcstr(prefix_utf), NULL)
+ del self._global_namespaces[:]
+
+ cdef void _unregisterNamespace(self, prefix_utf) noexcept:
+ xpath.xmlXPathRegisterNs(self._xpathCtxt,
+ _xcstr(prefix_utf), NULL)
+
+ # extension functions
+
+ cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1:
+ if self._extensions is None:
+ self._extensions = {}
+ self._extensions[(ns_utf, name_utf)] = function
+ return 0
+
+ cdef registerGlobalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ cdef dict d
+ for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
+ dict_result = python.PyDict_GetItem(
+ self._function_cache, ns_utf)
+ if dict_result is not NULL:
+ d = <dict>dict_result
+ else:
+ d = {}
+ self._function_cache[ns_utf] = d
+ for name_utf, function in ns_functions.iteritems():
+ d[name_utf] = function
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef registerLocalFunctions(self, void* ctxt,
+ _register_function reg_func):
+ cdef python.PyObject* dict_result
+ cdef dict d
+ if self._extensions is None:
+ return # done
+ last_ns = None
+ d = None
+ for (ns_utf, name_utf), function in self._extensions.iteritems():
+ if ns_utf is not last_ns or d is None:
+ last_ns = ns_utf
+ dict_result = python.PyDict_GetItem(
+ self._function_cache, ns_utf)
+ if dict_result is not NULL:
+ d = <dict>dict_result
+ else:
+ d = {}
+ self._function_cache[ns_utf] = d
+ d[name_utf] = function
+ reg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterAllFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for ns_utf, functions in self._function_cache.iteritems():
+ for name_utf in functions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
+ cdef unregisterGlobalFunctions(self, void* ctxt,
+ _register_function unreg_func):
+ for ns_utf, functions in self._function_cache.items():
+ for name_utf in functions:
+ if self._extensions is None or \
+ (ns_utf, name_utf) not in self._extensions:
+ unreg_func(ctxt, name_utf, ns_utf)
+
+ @cython.final
+ cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name):
+ """Lookup an extension function in the cache and return it.
+
+ Parameters: c_ns_uri may be NULL, c_name must not be NULL
+ """
+ cdef python.PyObject* c_dict
+ cdef python.PyObject* dict_result
+ c_dict = python.PyDict_GetItem(
+ self._function_cache, None if c_ns_uri is NULL else c_ns_uri)
+ if c_dict is not NULL:
+ dict_result = python.PyDict_GetItem(
+ <object>c_dict, <unsigned char*>c_name)
+ if dict_result is not NULL:
+ return <object>dict_result
+ return None
+
+ # Python access to the XPath context for extension functions
+
+ @property
+ def context_node(self):
+ cdef xmlNode* c_node
+ if self._xpathCtxt is NULL:
+ raise XPathError, \
+ "XPath context is only usable during the evaluation"
+ c_node = self._xpathCtxt.node
+ if c_node is NULL:
+ raise XPathError, "no context node"
+ if c_node.doc != self._xpathCtxt.doc:
+ raise XPathError, \
+ "document-external context nodes are not supported"
+ if self._doc is None:
+ raise XPathError, "document context is missing"
+ return _elementFactory(self._doc, c_node)
+
+ @property
+ def eval_context(self):
+ if self._eval_context_dict is None:
+ self._eval_context_dict = {}
+ return self._eval_context_dict
+
+ # Python reference keeping during XPath function evaluation
+
+ @cython.final
+ cdef _release_temp_refs(self):
+ "Free temporarily referenced objects from this context."
+ self._temp_refs.clear()
+ self._temp_documents.clear()
+
+ @cython.final
+ cdef _hold(self, obj):
+ """A way to temporarily hold references to nodes in the evaluator.
+
+ This is needed because otherwise nodes created in XPath extension
+ functions would be reference counted too soon, during the XPath
+ evaluation. This is most important in the case of exceptions.
+ """
+ cdef _Element element
+ if isinstance(obj, _Element):
+ self._temp_refs.add(obj)
+ self._temp_documents.add((<_Element>obj)._doc)
+ return
+ elif _isString(obj) or not python.PySequence_Check(obj):
+ return
+ for o in obj:
+ if isinstance(o, _Element):
+ #print "Holding element:", <int>element._c_node
+ self._temp_refs.add(o)
+ #print "Holding document:", <int>element._doc._c_doc
+ self._temp_documents.add((<_Element>o)._doc)
+
+ @cython.final
+ cdef _Document _findDocumentForNode(self, xmlNode* c_node):
+ """If an XPath expression returns an element from a different
+ document than the current context document, we call this to
+ see if it was possibly created by an extension and is a known
+ document instance.
+ """
+ cdef _Document doc
+ for doc in self._temp_documents:
+ if doc is not None and doc._c_doc is c_node.doc:
+ return doc
+ return None
+
+
+# libxml2 keeps these error messages in a static array in its code
+# and doesn't give us access to them ...
+
+cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = (
+ b"Ok",
+ b"Number encoding",
+ b"Unfinished literal",
+ b"Start of literal",
+ b"Expected $ for variable reference",
+ b"Undefined variable",
+ b"Invalid predicate",
+ b"Invalid expression",
+ b"Missing closing curly brace",
+ b"Unregistered function",
+ b"Invalid operand",
+ b"Invalid type",
+ b"Invalid number of arguments",
+ b"Invalid context size",
+ b"Invalid context position",
+ b"Memory allocation error",
+ b"Syntax error",
+ b"Resource error",
+ b"Sub resource error",
+ b"Undefined namespace prefix",
+ b"Encoding error",
+ b"Char out of XML range",
+ b"Invalid or incomplete context",
+ b"Stack usage error",
+ b"Forbidden variable\n",
+ b"?? Unknown error ??\n",
+)
+
+cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil:
+ cdef xmlerror.xmlError error
+ cdef int xpath_code
+ if c_error.message is not NULL:
+ error.message = c_error.message
+ else:
+ xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK
+ if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES):
+ error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code])
+ else:
+ error.message = b"unknown error"
+ error.domain = c_error.domain
+ error.code = c_error.code
+ error.level = c_error.level
+ error.line = c_error.line
+ error.int2 = c_error.int1 # column
+ error.file = c_error.file
+ error.node = NULL
+
+ (<_BaseContext>c_ctxt)._error_log._receive(&error)
+
+cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
+ if not __DEBUG:
+ return
+ if c_context is NULL:
+ _forwardError(NULL, error)
+ else:
+ _forwardXPathError(c_context, error)
+
+
+def Extension(module, function_mapping=None, *, ns=None):
+ """Extension(module, function_mapping=None, ns=None)
+
+ Build a dictionary of extension functions from the functions
+ defined in a module or the methods of an object.
+
+ As second argument, you can pass an additional mapping of
+ attribute names to XPath function names, or a list of function
+ names that should be taken.
+
+ The ``ns`` keyword argument accepts a namespace URI for the XPath
+ functions.
+ """
+ cdef dict functions = {}
+ if isinstance(function_mapping, dict):
+ for function_name, xpath_name in function_mapping.items():
+ functions[(ns, xpath_name)] = getattr(module, function_name)
+ else:
+ if function_mapping is None:
+ function_mapping = [ name for name in dir(module)
+ if not name.startswith('_') ]
+ for function_name in function_mapping:
+ functions[(ns, function_name)] = getattr(module, function_name)
+ return functions
+
+################################################################################
+# EXSLT regexp implementation
+
+@cython.final
+@cython.internal
+cdef class _ExsltRegExp:
+ cdef dict _compile_map
+ def __cinit__(self):
+ self._compile_map = {}
+
+ cdef _make_string(self, value):
+ if _isString(value):
+ return value
+ elif isinstance(value, list):
+ # node set: take recursive text concatenation of first element
+ if python.PyList_GET_SIZE(value) == 0:
+ return ''
+ firstnode = value[0]
+ if _isString(firstnode):
+ return firstnode
+ elif isinstance(firstnode, _Element):
+ c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node)
+ if c_text is NULL:
+ raise MemoryError()
+ try:
+ return funicode(c_text)
+ finally:
+ tree.xmlFree(c_text)
+ else:
+ return unicode(firstnode)
+ else:
+ return unicode(value)
+
+ cdef _compile(self, rexp, ignore_case):
+ cdef python.PyObject* c_result
+ rexp = self._make_string(rexp)
+ key = (rexp, ignore_case)
+ c_result = python.PyDict_GetItem(self._compile_map, key)
+ if c_result is not NULL:
+ return <object>c_result
+ py_flags = re.UNICODE
+ if ignore_case:
+ py_flags = py_flags | re.IGNORECASE
+ rexp_compiled = re.compile(rexp, py_flags)
+ self._compile_map[key] = rexp_compiled
+ return rexp_compiled
+
+ def test(self, ctxt, s, rexp, flags=''):
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if rexpc.search(s) is None:
+ return False
+ else:
+ return True
+
+ def match(self, ctxt, s, rexp, flags=''):
+ cdef list result_list
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ if 'g' in flags:
+ results = rexpc.findall(s)
+ if not results:
+ return ()
+ else:
+ result = rexpc.search(s)
+ if not result:
+ return ()
+ results = [ result.group() ]
+ results.extend( result.groups('') )
+ result_list = []
+ root = Element('matches')
+ for s_match in results:
+ if python.PyTuple_CheckExact(s_match):
+ s_match = ''.join(s_match)
+ elem = SubElement(root, 'match')
+ elem.text = s_match
+ result_list.append(elem)
+ return result_list
+
+ def replace(self, ctxt, s, rexp, flags, replacement):
+ replacement = self._make_string(replacement)
+ flags = self._make_string(flags)
+ s = self._make_string(s)
+ rexpc = self._compile(rexp, 'i' in flags)
+ count: object = 0 if 'g' in flags else 1
+ return rexpc.sub(replacement, s, count)
+
+ cdef _register_in_context(self, _BaseContext context):
+ ns = b"http://exslt.org/regular-expressions"
+ context._addLocalExtensionFunction(ns, b"test", self.test)
+ context._addLocalExtensionFunction(ns, b"match", self.match)
+ context._addLocalExtensionFunction(ns, b"replace", self.replace)
+
+
+################################################################################
+# helper functions
+
+cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
+ _BaseContext context) except NULL:
+ cdef xpath.xmlNodeSet* resultSet
+ cdef _Element fake_node = None
+ cdef xmlNode* c_node
+
+ if isinstance(obj, unicode):
+ obj = _utf8(obj)
+ if isinstance(obj, bytes):
+ # libxml2 copies the string value
+ return xpath.xmlXPathNewCString(_cstr(obj))
+ if isinstance(obj, bool):
+ return xpath.xmlXPathNewBoolean(obj)
+ if python.PyNumber_Check(obj):
+ return xpath.xmlXPathNewFloat(obj)
+ if obj is None:
+ resultSet = xpath.xmlXPathNodeSetCreate(NULL)
+ elif isinstance(obj, _Element):
+ resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
+ elif python.PySequence_Check(obj):
+ resultSet = xpath.xmlXPathNodeSetCreate(NULL)
+ try:
+ for value in obj:
+ if isinstance(value, _Element):
+ if context is not None:
+ context._hold(value)
+ xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
+ else:
+ if context is None or doc is None:
+ raise XPathResultError, \
+ f"Non-Element values not supported at this point - got {value!r}"
+ # support strings by appending text nodes to an Element
+ if isinstance(value, unicode):
+ value = _utf8(value)
+ if isinstance(value, bytes):
+ if fake_node is None:
+ fake_node = _makeElement("text-root", NULL, doc, None,
+ None, None, None, None, None)
+ context._hold(fake_node)
+ else:
+ # append a comment node to keep the text nodes separate
+ c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"")
+ if c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(fake_node._c_node, c_node)
+ context._hold(value)
+ c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value))
+ if c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(fake_node._c_node, c_node)
+ xpath.xmlXPathNodeSetAdd(resultSet, c_node)
+ else:
+ raise XPathResultError, \
+ f"This is not a supported node-set result: {value!r}"
+ except:
+ xpath.xmlXPathFreeNodeSet(resultSet)
+ raise
+ else:
+ raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}"
+ return xpath.xmlXPathWrapNodeSet(resultSet)
+
+cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
+ _Document doc, _BaseContext context):
+ if xpathObj.type == xpath.XPATH_UNDEFINED:
+ raise XPathResultError, "Undefined xpath result"
+ elif xpathObj.type == xpath.XPATH_NODESET:
+ return _createNodeSetResult(xpathObj, doc, context)
+ elif xpathObj.type == xpath.XPATH_BOOLEAN:
+ return xpathObj.boolval
+ elif xpathObj.type == xpath.XPATH_NUMBER:
+ return xpathObj.floatval
+ elif xpathObj.type == xpath.XPATH_STRING:
+ stringval = funicode(xpathObj.stringval)
+ if context._build_smart_strings:
+ stringval = _elementStringResultFactory(
+ stringval, None, None, False)
+ return stringval
+ elif xpathObj.type == xpath.XPATH_POINT:
+ raise NotImplementedError, "XPATH_POINT"
+ elif xpathObj.type == xpath.XPATH_RANGE:
+ raise NotImplementedError, "XPATH_RANGE"
+ elif xpathObj.type == xpath.XPATH_LOCATIONSET:
+ raise NotImplementedError, "XPATH_LOCATIONSET"
+ elif xpathObj.type == xpath.XPATH_USERS:
+ raise NotImplementedError, "XPATH_USERS"
+ elif xpathObj.type == xpath.XPATH_XSLT_TREE:
+ return _createNodeSetResult(xpathObj, doc, context)
+ else:
+ raise XPathResultError, f"Unknown xpath result {xpathObj.type}"
+
+cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
+ _BaseContext context):
+ cdef xmlNode* c_node
+ cdef int i
+ cdef list result
+ result = []
+ if xpathObj.nodesetval is NULL:
+ return result
+ for i in range(xpathObj.nodesetval.nodeNr):
+ c_node = xpathObj.nodesetval.nodeTab[i]
+ _unpackNodeSetEntry(result, c_node, doc, context,
+ xpathObj.type == xpath.XPATH_XSLT_TREE)
+ return result
+
+cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
+ _BaseContext context, bint is_fragment):
+ cdef xmlNode* c_child
+ if _isElement(c_node):
+ if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+ # XXX: works, but maybe not always the right thing to do?
+ # XPath: only runs when extensions create or copy trees
+ # -> we store Python refs to these, so that is OK
+ # XSLT: can it leak when merging trees from multiple sources?
+ c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ # FIXME: call _instantiateElementFromXPath() instead?
+ results.append(
+ _fakeDocElementFactory(doc, c_node))
+ elif c_node.type == tree.XML_TEXT_NODE or \
+ c_node.type == tree.XML_CDATA_SECTION_NODE or \
+ c_node.type == tree.XML_ATTRIBUTE_NODE:
+ results.append(
+ _buildElementStringResult(doc, c_node, context))
+ elif c_node.type == tree.XML_NAMESPACE_DECL:
+ results.append( (funicodeOrNone((<xmlNs*>c_node).prefix),
+ funicodeOrNone((<xmlNs*>c_node).href)) )
+ elif c_node.type == tree.XML_DOCUMENT_NODE or \
+ c_node.type == tree.XML_HTML_DOCUMENT_NODE:
+ # ignored for everything but result tree fragments
+ if is_fragment:
+ c_child = c_node.children
+ while c_child is not NULL:
+ _unpackNodeSetEntry(results, c_child, doc, context, 0)
+ c_child = c_child.next
+ elif c_node.type == tree.XML_XINCLUDE_START or \
+ c_node.type == tree.XML_XINCLUDE_END:
+ pass
+ else:
+ raise NotImplementedError, \
+ f"Not yet implemented result node type: {c_node.type}"
+
+cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept:
+ """Free the XPath object, but *never* free the *content* of node sets.
+ Python dealloc will do that for us.
+ """
+ if xpathObj.nodesetval is not NULL:
+ xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval)
+ xpathObj.nodesetval = NULL
+ xpath.xmlXPathFreeObject(xpathObj)
+
+cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
+ _BaseContext context):
+ # NOTE: this may copy the element - only call this when it can't leak
+ if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+ # not from the context document and not from a fake document
+ # either => may still be from a known document, e.g. one
+ # created by an extension function
+ node_doc = context._findDocumentForNode(c_node)
+ if node_doc is None:
+ # not from a known document at all! => can only make a
+ # safety copy here
+ c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+ else:
+ doc = node_doc
+ return _fakeDocElementFactory(doc, c_node)
+
+################################################################################
+# special str/unicode subclasses
+
+@cython.final
+cdef class _ElementUnicodeResult(unicode):
+ cdef _Element _parent
+ cdef readonly object attrname
+ cdef readonly bint is_tail
+
+ def getparent(self):
+ return self._parent
+
+ @property
+ def is_text(self):
+ return self._parent is not None and not (self.is_tail or self.attrname is not None)
+
+ @property
+ def is_attribute(self):
+ return self.attrname is not None
+
+cdef object _elementStringResultFactory(string_value, _Element parent,
+ attrname, bint is_tail):
+ result = _ElementUnicodeResult(string_value)
+ result._parent = parent
+ result.is_tail = is_tail
+ result.attrname = attrname
+ return result
+
+cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
+ _BaseContext context):
+ cdef _Element parent = None
+ cdef object attrname = None
+ cdef xmlNode* c_element
+ cdef bint is_tail
+
+ if c_node.type == tree.XML_ATTRIBUTE_NODE:
+ attrname = _namespacedName(c_node)
+ is_tail = 0
+ s = tree.xmlNodeGetContent(c_node)
+ try:
+ value = funicode(s)
+ finally:
+ tree.xmlFree(s)
+ c_element = NULL
+ else:
+ #assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type"
+ # may be tail text or normal text
+ value = funicode(c_node.content)
+ c_element = _previousElement(c_node)
+ is_tail = c_element is not NULL
+
+ if not context._build_smart_strings:
+ return value
+
+ if c_element is NULL:
+ # non-tail text or attribute text
+ c_element = c_node.parent
+ while c_element is not NULL and not _isElement(c_element):
+ c_element = c_element.parent
+
+ if c_element is not NULL:
+ parent = _instantiateElementFromXPath(c_element, doc, context)
+
+ return _elementStringResultFactory(
+ value, parent, attrname, is_tail)
+
+################################################################################
+# callbacks for XPath/XSLT extension functions
+
+cdef void _extension_function_call(_BaseContext context, function,
+ xpath.xmlXPathParserContext* ctxt, int nargs) noexcept:
+ cdef _Document doc
+ cdef xpath.xmlXPathObject* obj
+ cdef list args
+ cdef int i
+ doc = context._doc
+ try:
+ args = []
+ for i in range(nargs):
+ obj = xpath.valuePop(ctxt)
+ o = _unwrapXPathObject(obj, doc, context)
+ _freeXPathObject(obj)
+ args.append(o)
+ args.reverse()
+
+ res = function(context, *args)
+ # wrap result for XPath consumption
+ obj = _wrapXPathObject(res, doc, context)
+ # prevent Python from deallocating elements handed to libxml2
+ context._hold(res)
+ xpath.valuePush(ctxt, obj)
+ except:
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
+ context._exc._store_raised()
+ finally:
+ return # swallow any further exceptions
+
+# lookup the function by name and call it
+
+cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
+ int nargs) noexcept with gil:
+ cdef _BaseContext context
+ cdef xpath.xmlXPathContext* rctxt = ctxt.context
+ context = <_BaseContext> rctxt.userData
+ try:
+ function = context._find_cached_function(rctxt.functionURI, rctxt.function)
+ if function is not None:
+ _extension_function_call(context, function, ctxt, nargs)
+ else:
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
+ context._exc._store_exception(XPathFunctionError(
+ f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found"))
+ except:
+ # may not be the right error, but we need to tell libxml2 *something*
+ xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
+ context._exc._store_raised()
+ finally:
+ return # swallow any further exceptions