diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/extensions.pxi | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/extensions.pxi')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lxml/extensions.pxi | 833 |
1 files changed, 833 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/extensions.pxi b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi new file mode 100644 index 00000000..2a2c94ec --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi @@ -0,0 +1,833 @@ +# support for extension functions in XPath and XSLT + +cdef class XPathError(LxmlError): + """Base class of all XPath errors. + """ + +cdef class XPathEvalError(XPathError): + """Error during XPath evaluation. + """ + +cdef class XPathFunctionError(XPathEvalError): + """Internal error looking up an XPath extension function. + """ + +cdef class XPathResultError(XPathEvalError): + """Error handling an XPath result. + """ + + +# forward declarations + +ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf) +cdef class _ExsltRegExp + +################################################################################ +# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... + +@cython.internal +cdef class _BaseContext: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _Document _doc + cdef dict _extensions + cdef list _namespaces + cdef list _global_namespaces + cdef dict _utf_refs + cdef dict _function_cache + cdef dict _eval_context_dict + cdef bint _build_smart_strings + # for exception handling and temporary reference keeping: + cdef _TempStore _temp_refs + cdef set _temp_documents + cdef _ExceptionContext _exc + cdef _ErrorLog _error_log + + def __cinit__(self): + self._xpathCtxt = NULL + + def __init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings): + cdef _ExsltRegExp _regexp + cdef dict new_extensions + cdef list ns + self._utf_refs = {} + self._global_namespaces = [] + self._function_cache = {} + self._eval_context_dict = None + self._error_log = error_log + + if extensions is not None: + # convert extensions to UTF-8 + if isinstance(extensions, dict): + extensions = (extensions,) + # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function} + new_extensions = {} + for extension in extensions: + for (ns_uri, name), function in extension.items(): + if name is None: + raise ValueError, "extensions must have non empty names" + ns_utf = self._to_utf(ns_uri) + name_utf = self._to_utf(name) + new_extensions[(ns_utf, name_utf)] = function + extensions = new_extensions or None + + if namespaces is not None: + if isinstance(namespaces, dict): + namespaces = namespaces.items() + if namespaces: + ns = [] + for prefix, ns_uri in namespaces: + if prefix is None or not prefix: + raise TypeError, \ + "empty namespace prefix is not supported in XPath" + if ns_uri is None or not ns_uri: + raise TypeError, \ + "setting default namespace is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + ns.append( (prefix_utf, ns_uri_utf) ) + namespaces = ns + else: + namespaces = None + + self._doc = None + self._exc = _ExceptionContext() + self._extensions = extensions + self._namespaces = namespaces + self._temp_refs = _TempStore() + self._temp_documents = set() + self._build_smart_strings = build_smart_strings + + if enable_regexp: + _regexp = _ExsltRegExp() + _regexp._register_in_context(self) + + cdef _BaseContext _copy(self): + cdef _BaseContext context + if self._namespaces is not None: + namespaces = self._namespaces[:] + else: + namespaces = None + context = self.__class__(namespaces, None, self._error_log, False, + self._build_smart_strings) + if self._extensions is not None: + context._extensions = self._extensions.copy() + return context + + cdef bytes _to_utf(self, s): + "Convert to UTF-8 and keep a reference to the encoded string" + cdef python.PyObject* dict_result + if s is None: + return None + dict_result = python.PyDict_GetItem(self._utf_refs, s) + if dict_result is not NULL: + return <bytes>dict_result + utf = _utf8(s) + self._utf_refs[s] = utf + if python.IS_PYPY: + # use C level refs, PyPy refs are not enough! + python.Py_INCREF(utf) + return utf + + cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept: + self._xpathCtxt = xpathCtxt + xpathCtxt.userData = <void*>self + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError + + @cython.final + cdef _register_context(self, _Document doc): + self._doc = doc + self._exc.clear() + + @cython.final + cdef _cleanup_context(self): + #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) + #self.unregisterGlobalNamespaces() + if python.IS_PYPY: + # clean up double refs in PyPy (see "_to_utf()" method) + for ref in self._utf_refs.itervalues(): + python.Py_DECREF(ref) + self._utf_refs.clear() + self._eval_context_dict = None + self._doc = None + + @cython.final + cdef _release_context(self): + if self._xpathCtxt is not NULL: + self._xpathCtxt.userData = NULL + self._xpathCtxt = NULL + + # namespaces (internal UTF-8 methods with leading '_') + + cdef addNamespace(self, prefix, ns_uri): + cdef list namespaces + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + new_item = (prefix_utf, ns_uri_utf) + if self._namespaces is None: + self._namespaces = [new_item] + else: + namespaces = [] + for item in self._namespaces: + if item[0] == prefix_utf: + item = new_item + new_item = None + namespaces.append(item) + if new_item is not None: + namespaces.append(new_item) + self._namespaces = namespaces + if self._xpathCtxt is not NULL: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerNamespace(self, prefix, ns_uri): + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + self._global_namespaces.append(prefix_utf) + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerLocalNamespaces(self): + if self._namespaces is None: + return + for prefix_utf, ns_uri_utf in self._namespaces: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerGlobalNamespaces(self): + cdef list ns_prefixes = _find_all_extension_prefixes() + if python.PyList_GET_SIZE(ns_prefixes) > 0: + for prefix_utf, ns_uri_utf in ns_prefixes: + self._global_namespaces.append(prefix_utf) + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef unregisterGlobalNamespaces(self): + if python.PyList_GET_SIZE(self._global_namespaces) > 0: + for prefix_utf in self._global_namespaces: + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), NULL) + del self._global_namespaces[:] + + cdef void _unregisterNamespace(self, prefix_utf) noexcept: + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), NULL) + + # extension functions + + cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1: + if self._extensions is None: + self._extensions = {} + self._extensions[(ns_utf, name_utf)] = function + return 0 + + cdef registerGlobalFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result + cdef dict d + for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + dict_result = python.PyDict_GetItem( + self._function_cache, ns_utf) + if dict_result is not NULL: + d = <dict>dict_result + else: + d = {} + self._function_cache[ns_utf] = d + for name_utf, function in ns_functions.iteritems(): + d[name_utf] = function + reg_func(ctxt, name_utf, ns_utf) + + cdef registerLocalFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result + cdef dict d + if self._extensions is None: + return # done + last_ns = None + d = None + for (ns_utf, name_utf), function in self._extensions.iteritems(): + if ns_utf is not last_ns or d is None: + last_ns = ns_utf + dict_result = python.PyDict_GetItem( + self._function_cache, ns_utf) + if dict_result is not NULL: + d = <dict>dict_result + else: + d = {} + self._function_cache[ns_utf] = d + d[name_utf] = function + reg_func(ctxt, name_utf, ns_utf) + + cdef unregisterAllFunctions(self, void* ctxt, + _register_function unreg_func): + for ns_utf, functions in self._function_cache.iteritems(): + for name_utf in functions: + unreg_func(ctxt, name_utf, ns_utf) + + cdef unregisterGlobalFunctions(self, void* ctxt, + _register_function unreg_func): + for ns_utf, functions in self._function_cache.items(): + for name_utf in functions: + if self._extensions is None or \ + (ns_utf, name_utf) not in self._extensions: + unreg_func(ctxt, name_utf, ns_utf) + + @cython.final + cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name): + """Lookup an extension function in the cache and return it. + + Parameters: c_ns_uri may be NULL, c_name must not be NULL + """ + cdef python.PyObject* c_dict + cdef python.PyObject* dict_result + c_dict = python.PyDict_GetItem( + self._function_cache, None if c_ns_uri is NULL else c_ns_uri) + if c_dict is not NULL: + dict_result = python.PyDict_GetItem( + <object>c_dict, <unsigned char*>c_name) + if dict_result is not NULL: + return <object>dict_result + return None + + # Python access to the XPath context for extension functions + + @property + def context_node(self): + cdef xmlNode* c_node + if self._xpathCtxt is NULL: + raise XPathError, \ + "XPath context is only usable during the evaluation" + c_node = self._xpathCtxt.node + if c_node is NULL: + raise XPathError, "no context node" + if c_node.doc != self._xpathCtxt.doc: + raise XPathError, \ + "document-external context nodes are not supported" + if self._doc is None: + raise XPathError, "document context is missing" + return _elementFactory(self._doc, c_node) + + @property + def eval_context(self): + if self._eval_context_dict is None: + self._eval_context_dict = {} + return self._eval_context_dict + + # Python reference keeping during XPath function evaluation + + @cython.final + cdef _release_temp_refs(self): + "Free temporarily referenced objects from this context." + self._temp_refs.clear() + self._temp_documents.clear() + + @cython.final + cdef _hold(self, obj): + """A way to temporarily hold references to nodes in the evaluator. + + This is needed because otherwise nodes created in XPath extension + functions would be reference counted too soon, during the XPath + evaluation. This is most important in the case of exceptions. + """ + cdef _Element element + if isinstance(obj, _Element): + self._temp_refs.add(obj) + self._temp_documents.add((<_Element>obj)._doc) + return + elif _isString(obj) or not python.PySequence_Check(obj): + return + for o in obj: + if isinstance(o, _Element): + #print "Holding element:", <int>element._c_node + self._temp_refs.add(o) + #print "Holding document:", <int>element._doc._c_doc + self._temp_documents.add((<_Element>o)._doc) + + @cython.final + cdef _Document _findDocumentForNode(self, xmlNode* c_node): + """If an XPath expression returns an element from a different + document than the current context document, we call this to + see if it was possibly created by an extension and is a known + document instance. + """ + cdef _Document doc + for doc in self._temp_documents: + if doc is not None and doc._c_doc is c_node.doc: + return doc + return None + + +# libxml2 keeps these error messages in a static array in its code +# and doesn't give us access to them ... + +cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = ( + b"Ok", + b"Number encoding", + b"Unfinished literal", + b"Start of literal", + b"Expected $ for variable reference", + b"Undefined variable", + b"Invalid predicate", + b"Invalid expression", + b"Missing closing curly brace", + b"Unregistered function", + b"Invalid operand", + b"Invalid type", + b"Invalid number of arguments", + b"Invalid context size", + b"Invalid context position", + b"Memory allocation error", + b"Syntax error", + b"Resource error", + b"Sub resource error", + b"Undefined namespace prefix", + b"Encoding error", + b"Char out of XML range", + b"Invalid or incomplete context", + b"Stack usage error", + b"Forbidden variable\n", + b"?? Unknown error ??\n", +) + +cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil: + cdef xmlerror.xmlError error + cdef int xpath_code + if c_error.message is not NULL: + error.message = c_error.message + else: + xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK + if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES): + error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code]) + else: + error.message = b"unknown error" + error.domain = c_error.domain + error.code = c_error.code + error.level = c_error.level + error.line = c_error.line + error.int2 = c_error.int1 # column + error.file = c_error.file + error.node = NULL + + (<_BaseContext>c_ctxt)._error_log._receive(&error) + +cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil: + if not __DEBUG: + return + if c_context is NULL: + _forwardError(NULL, error) + else: + _forwardXPathError(c_context, error) + + +def Extension(module, function_mapping=None, *, ns=None): + """Extension(module, function_mapping=None, ns=None) + + Build a dictionary of extension functions from the functions + defined in a module or the methods of an object. + + As second argument, you can pass an additional mapping of + attribute names to XPath function names, or a list of function + names that should be taken. + + The ``ns`` keyword argument accepts a namespace URI for the XPath + functions. + """ + cdef dict functions = {} + if isinstance(function_mapping, dict): + for function_name, xpath_name in function_mapping.items(): + functions[(ns, xpath_name)] = getattr(module, function_name) + else: + if function_mapping is None: + function_mapping = [ name for name in dir(module) + if not name.startswith('_') ] + for function_name in function_mapping: + functions[(ns, function_name)] = getattr(module, function_name) + return functions + +################################################################################ +# EXSLT regexp implementation + +@cython.final +@cython.internal +cdef class _ExsltRegExp: + cdef dict _compile_map + def __cinit__(self): + self._compile_map = {} + + cdef _make_string(self, value): + if _isString(value): + return value + elif isinstance(value, list): + # node set: take recursive text concatenation of first element + if python.PyList_GET_SIZE(value) == 0: + return '' + firstnode = value[0] + if _isString(firstnode): + return firstnode + elif isinstance(firstnode, _Element): + c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node) + if c_text is NULL: + raise MemoryError() + try: + return funicode(c_text) + finally: + tree.xmlFree(c_text) + else: + return unicode(firstnode) + else: + return unicode(value) + + cdef _compile(self, rexp, ignore_case): + cdef python.PyObject* c_result + rexp = self._make_string(rexp) + key = (rexp, ignore_case) + c_result = python.PyDict_GetItem(self._compile_map, key) + if c_result is not NULL: + return <object>c_result + py_flags = re.UNICODE + if ignore_case: + py_flags = py_flags | re.IGNORECASE + rexp_compiled = re.compile(rexp, py_flags) + self._compile_map[key] = rexp_compiled + return rexp_compiled + + def test(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if rexpc.search(s) is None: + return False + else: + return True + + def match(self, ctxt, s, rexp, flags=''): + cdef list result_list + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + results = rexpc.findall(s) + if not results: + return () + else: + result = rexpc.search(s) + if not result: + return () + results = [ result.group() ] + results.extend( result.groups('') ) + result_list = [] + root = Element('matches') + for s_match in results: + if python.PyTuple_CheckExact(s_match): + s_match = ''.join(s_match) + elem = SubElement(root, 'match') + elem.text = s_match + result_list.append(elem) + return result_list + + def replace(self, ctxt, s, rexp, flags, replacement): + replacement = self._make_string(replacement) + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + count: object = 0 if 'g' in flags else 1 + return rexpc.sub(replacement, s, count) + + cdef _register_in_context(self, _BaseContext context): + ns = b"http://exslt.org/regular-expressions" + context._addLocalExtensionFunction(ns, b"test", self.test) + context._addLocalExtensionFunction(ns, b"match", self.match) + context._addLocalExtensionFunction(ns, b"replace", self.replace) + + +################################################################################ +# helper functions + +cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc, + _BaseContext context) except NULL: + cdef xpath.xmlNodeSet* resultSet + cdef _Element fake_node = None + cdef xmlNode* c_node + + if isinstance(obj, unicode): + obj = _utf8(obj) + if isinstance(obj, bytes): + # libxml2 copies the string value + return xpath.xmlXPathNewCString(_cstr(obj)) + if isinstance(obj, bool): + return xpath.xmlXPathNewBoolean(obj) + if python.PyNumber_Check(obj): + return xpath.xmlXPathNewFloat(obj) + if obj is None: + resultSet = xpath.xmlXPathNodeSetCreate(NULL) + elif isinstance(obj, _Element): + resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) + elif python.PySequence_Check(obj): + resultSet = xpath.xmlXPathNodeSetCreate(NULL) + try: + for value in obj: + if isinstance(value, _Element): + if context is not None: + context._hold(value) + xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node) + else: + if context is None or doc is None: + raise XPathResultError, \ + f"Non-Element values not supported at this point - got {value!r}" + # support strings by appending text nodes to an Element + if isinstance(value, unicode): + value = _utf8(value) + if isinstance(value, bytes): + if fake_node is None: + fake_node = _makeElement("text-root", NULL, doc, None, + None, None, None, None, None) + context._hold(fake_node) + else: + # append a comment node to keep the text nodes separate + c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"") + if c_node is NULL: + raise MemoryError() + tree.xmlAddChild(fake_node._c_node, c_node) + context._hold(value) + c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value)) + if c_node is NULL: + raise MemoryError() + tree.xmlAddChild(fake_node._c_node, c_node) + xpath.xmlXPathNodeSetAdd(resultSet, c_node) + else: + raise XPathResultError, \ + f"This is not a supported node-set result: {value!r}" + except: + xpath.xmlXPathFreeNodeSet(resultSet) + raise + else: + raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}" + return xpath.xmlXPathWrapNodeSet(resultSet) + +cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, + _Document doc, _BaseContext context): + if xpathObj.type == xpath.XPATH_UNDEFINED: + raise XPathResultError, "Undefined xpath result" + elif xpathObj.type == xpath.XPATH_NODESET: + return _createNodeSetResult(xpathObj, doc, context) + elif xpathObj.type == xpath.XPATH_BOOLEAN: + return xpathObj.boolval + elif xpathObj.type == xpath.XPATH_NUMBER: + return xpathObj.floatval + elif xpathObj.type == xpath.XPATH_STRING: + stringval = funicode(xpathObj.stringval) + if context._build_smart_strings: + stringval = _elementStringResultFactory( + stringval, None, None, False) + return stringval + elif xpathObj.type == xpath.XPATH_POINT: + raise NotImplementedError, "XPATH_POINT" + elif xpathObj.type == xpath.XPATH_RANGE: + raise NotImplementedError, "XPATH_RANGE" + elif xpathObj.type == xpath.XPATH_LOCATIONSET: + raise NotImplementedError, "XPATH_LOCATIONSET" + elif xpathObj.type == xpath.XPATH_USERS: + raise NotImplementedError, "XPATH_USERS" + elif xpathObj.type == xpath.XPATH_XSLT_TREE: + return _createNodeSetResult(xpathObj, doc, context) + else: + raise XPathResultError, f"Unknown xpath result {xpathObj.type}" + +cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc, + _BaseContext context): + cdef xmlNode* c_node + cdef int i + cdef list result + result = [] + if xpathObj.nodesetval is NULL: + return result + for i in range(xpathObj.nodesetval.nodeNr): + c_node = xpathObj.nodesetval.nodeTab[i] + _unpackNodeSetEntry(result, c_node, doc, context, + xpathObj.type == xpath.XPATH_XSLT_TREE) + return result + +cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc, + _BaseContext context, bint is_fragment): + cdef xmlNode* c_child + if _isElement(c_node): + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # XXX: works, but maybe not always the right thing to do? + # XPath: only runs when extensions create or copy trees + # -> we store Python refs to these, so that is OK + # XSLT: can it leak when merging trees from multiple sources? + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + # FIXME: call _instantiateElementFromXPath() instead? + results.append( + _fakeDocElementFactory(doc, c_node)) + elif c_node.type == tree.XML_TEXT_NODE or \ + c_node.type == tree.XML_CDATA_SECTION_NODE or \ + c_node.type == tree.XML_ATTRIBUTE_NODE: + results.append( + _buildElementStringResult(doc, c_node, context)) + elif c_node.type == tree.XML_NAMESPACE_DECL: + results.append( (funicodeOrNone((<xmlNs*>c_node).prefix), + funicodeOrNone((<xmlNs*>c_node).href)) ) + elif c_node.type == tree.XML_DOCUMENT_NODE or \ + c_node.type == tree.XML_HTML_DOCUMENT_NODE: + # ignored for everything but result tree fragments + if is_fragment: + c_child = c_node.children + while c_child is not NULL: + _unpackNodeSetEntry(results, c_child, doc, context, 0) + c_child = c_child.next + elif c_node.type == tree.XML_XINCLUDE_START or \ + c_node.type == tree.XML_XINCLUDE_END: + pass + else: + raise NotImplementedError, \ + f"Not yet implemented result node type: {c_node.type}" + +cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept: + """Free the XPath object, but *never* free the *content* of node sets. + Python dealloc will do that for us. + """ + if xpathObj.nodesetval is not NULL: + xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) + xpathObj.nodesetval = NULL + xpath.xmlXPathFreeObject(xpathObj) + +cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc, + _BaseContext context): + # NOTE: this may copy the element - only call this when it can't leak + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # not from the context document and not from a fake document + # either => may still be from a known document, e.g. one + # created by an extension function + node_doc = context._findDocumentForNode(c_node) + if node_doc is None: + # not from a known document at all! => can only make a + # safety copy here + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + else: + doc = node_doc + return _fakeDocElementFactory(doc, c_node) + +################################################################################ +# special str/unicode subclasses + +@cython.final +cdef class _ElementUnicodeResult(unicode): + cdef _Element _parent + cdef readonly object attrname + cdef readonly bint is_tail + + def getparent(self): + return self._parent + + @property + def is_text(self): + return self._parent is not None and not (self.is_tail or self.attrname is not None) + + @property + def is_attribute(self): + return self.attrname is not None + +cdef object _elementStringResultFactory(string_value, _Element parent, + attrname, bint is_tail): + result = _ElementUnicodeResult(string_value) + result._parent = parent + result.is_tail = is_tail + result.attrname = attrname + return result + +cdef object _buildElementStringResult(_Document doc, xmlNode* c_node, + _BaseContext context): + cdef _Element parent = None + cdef object attrname = None + cdef xmlNode* c_element + cdef bint is_tail + + if c_node.type == tree.XML_ATTRIBUTE_NODE: + attrname = _namespacedName(c_node) + is_tail = 0 + s = tree.xmlNodeGetContent(c_node) + try: + value = funicode(s) + finally: + tree.xmlFree(s) + c_element = NULL + else: + #assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type" + # may be tail text or normal text + value = funicode(c_node.content) + c_element = _previousElement(c_node) + is_tail = c_element is not NULL + + if not context._build_smart_strings: + return value + + if c_element is NULL: + # non-tail text or attribute text + c_element = c_node.parent + while c_element is not NULL and not _isElement(c_element): + c_element = c_element.parent + + if c_element is not NULL: + parent = _instantiateElementFromXPath(c_element, doc, context) + + return _elementStringResultFactory( + value, parent, attrname, is_tail) + +################################################################################ +# callbacks for XPath/XSLT extension functions + +cdef void _extension_function_call(_BaseContext context, function, + xpath.xmlXPathParserContext* ctxt, int nargs) noexcept: + cdef _Document doc + cdef xpath.xmlXPathObject* obj + cdef list args + cdef int i + doc = context._doc + try: + args = [] + for i in range(nargs): + obj = xpath.valuePop(ctxt) + o = _unwrapXPathObject(obj, doc, context) + _freeXPathObject(obj) + args.append(o) + args.reverse() + + res = function(context, *args) + # wrap result for XPath consumption + obj = _wrapXPathObject(res, doc, context) + # prevent Python from deallocating elements handed to libxml2 + context._hold(res) + xpath.valuePush(ctxt, obj) + except: + xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) + context._exc._store_raised() + finally: + return # swallow any further exceptions + +# lookup the function by name and call it + +cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, + int nargs) noexcept with gil: + cdef _BaseContext context + cdef xpath.xmlXPathContext* rctxt = ctxt.context + context = <_BaseContext> rctxt.userData + try: + function = context._find_cached_function(rctxt.functionURI, rctxt.function) + if function is not None: + _extension_function_call(context, function, ctxt, nargs) + else: + xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) + context._exc._store_exception(XPathFunctionError( + f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found")) + except: + # may not be the right error, but we need to tell libxml2 *something* + xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) + context._exc._store_raised() + finally: + return # swallow any further exceptions |