two version of R2R are here HEAD master

author: S. Solomon Darnell 2025-03-28 21:52:21 -0500
committer: S. Solomon Darnell 2025-03-28 21:52:21 -0500
commit: 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree: ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/extensions.pxi
parent: cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download: gn-ai-master.tar.gz
1 files changed, 833 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/extensions.pxi b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi
new file mode 100644
index 00000000..2a2c94ec
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi
@@ -0,0 +1,833 @@
+# support for extension functions in XPath and XSLT
+
+cdef class XPathError(LxmlError):
+    """Base class of all XPath errors.
+    """
+
+cdef class XPathEvalError(XPathError):
+    """Error during XPath evaluation.
+    """
+
+cdef class XPathFunctionError(XPathEvalError):
+    """Internal error looking up an XPath extension function.
+    """
+
+cdef class XPathResultError(XPathEvalError):
+    """Error handling an XPath result.
+    """
+
+
+# forward declarations
+
+ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf)
+cdef class _ExsltRegExp
+
+################################################################################
+# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ...
+
+@cython.internal
+cdef class _BaseContext:
+    cdef xpath.xmlXPathContext* _xpathCtxt
+    cdef _Document _doc
+    cdef dict _extensions
+    cdef list _namespaces
+    cdef list _global_namespaces
+    cdef dict _utf_refs
+    cdef dict _function_cache
+    cdef dict _eval_context_dict
+    cdef bint _build_smart_strings
+    # for exception handling and temporary reference keeping:
+    cdef _TempStore _temp_refs
+    cdef set _temp_documents
+    cdef _ExceptionContext _exc
+    cdef _ErrorLog _error_log
+
+    def __cinit__(self):
+        self._xpathCtxt = NULL
+
+    def __init__(self, namespaces, extensions, error_log, enable_regexp,
+                 build_smart_strings):
+        cdef _ExsltRegExp _regexp 
+        cdef dict new_extensions
+        cdef list ns
+        self._utf_refs = {}
+        self._global_namespaces = []
+        self._function_cache = {}
+        self._eval_context_dict = None
+        self._error_log = error_log
+
+        if extensions is not None:
+            # convert extensions to UTF-8
+            if isinstance(extensions, dict):
+                extensions = (extensions,)
+            # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function}
+            new_extensions = {}
+            for extension in extensions:
+                for (ns_uri, name), function in extension.items():
+                    if name is None:
+                        raise ValueError, "extensions must have non empty names"
+                    ns_utf   = self._to_utf(ns_uri)
+                    name_utf = self._to_utf(name)
+                    new_extensions[(ns_utf, name_utf)] = function
+            extensions = new_extensions or None
+
+        if namespaces is not None:
+            if isinstance(namespaces, dict):
+                namespaces = namespaces.items()
+            if namespaces:
+                ns = []
+                for prefix, ns_uri in namespaces:
+                    if prefix is None or not prefix:
+                        raise TypeError, \
+                            "empty namespace prefix is not supported in XPath"
+                    if ns_uri is None or not ns_uri:
+                        raise TypeError, \
+                            "setting default namespace is not supported in XPath"
+                    prefix_utf = self._to_utf(prefix)
+                    ns_uri_utf = self._to_utf(ns_uri)
+                    ns.append( (prefix_utf, ns_uri_utf) )
+                namespaces = ns
+            else:
+                namespaces = None
+
+        self._doc        = None
+        self._exc        = _ExceptionContext()
+        self._extensions = extensions
+        self._namespaces = namespaces
+        self._temp_refs  = _TempStore()
+        self._temp_documents  = set()
+        self._build_smart_strings = build_smart_strings
+
+        if enable_regexp:
+            _regexp = _ExsltRegExp()
+            _regexp._register_in_context(self)
+
+    cdef _BaseContext _copy(self):
+        cdef _BaseContext context
+        if self._namespaces is not None:
+            namespaces = self._namespaces[:]
+        else:
+            namespaces = None
+        context = self.__class__(namespaces, None, self._error_log, False,
+                                 self._build_smart_strings)
+        if self._extensions is not None:
+            context._extensions = self._extensions.copy()
+        return context
+
+    cdef bytes _to_utf(self, s):
+        "Convert to UTF-8 and keep a reference to the encoded string"
+        cdef python.PyObject* dict_result
+        if s is None:
+            return None
+        dict_result = python.PyDict_GetItem(self._utf_refs, s)
+        if dict_result is not NULL:
+            return <bytes>dict_result
+        utf = _utf8(s)
+        self._utf_refs[s] = utf
+        if python.IS_PYPY:
+            # use C level refs, PyPy refs are not enough!
+            python.Py_INCREF(utf)
+        return utf
+
+    cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept:
+        self._xpathCtxt = xpathCtxt
+        xpathCtxt.userData = <void*>self
+        # Need a cast here because older libxml2 releases do not use 'const' in the functype.
+        xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError
+
+    @cython.final
+    cdef _register_context(self, _Document doc):
+        self._doc = doc
+        self._exc.clear()
+
+    @cython.final
+    cdef _cleanup_context(self):
+        #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt)
+        #self.unregisterGlobalNamespaces()
+        if python.IS_PYPY:
+            # clean up double refs in PyPy (see "_to_utf()" method)
+            for ref in self._utf_refs.itervalues():
+                python.Py_DECREF(ref)
+        self._utf_refs.clear()
+        self._eval_context_dict = None
+        self._doc = None
+
+    @cython.final
+    cdef _release_context(self):
+        if self._xpathCtxt is not NULL:
+            self._xpathCtxt.userData = NULL
+            self._xpathCtxt = NULL
+
+    # namespaces (internal UTF-8 methods with leading '_')
+
+    cdef addNamespace(self, prefix, ns_uri):
+        cdef list namespaces
+        if prefix is None:
+            raise TypeError, "empty prefix is not supported in XPath"
+        prefix_utf = self._to_utf(prefix)
+        ns_uri_utf = self._to_utf(ns_uri)
+        new_item = (prefix_utf, ns_uri_utf)
+        if self._namespaces is None:
+            self._namespaces = [new_item]
+        else:
+            namespaces = []
+            for item in self._namespaces:
+                if item[0] == prefix_utf:
+                    item = new_item
+                    new_item = None
+                namespaces.append(item)
+            if new_item is not None:
+                namespaces.append(new_item)
+            self._namespaces = namespaces
+        if self._xpathCtxt is not NULL:
+            xpath.xmlXPathRegisterNs(
+                self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+    cdef registerNamespace(self, prefix, ns_uri):
+        if prefix is None:
+            raise TypeError, "empty prefix is not supported in XPath"
+        prefix_utf = self._to_utf(prefix)
+        ns_uri_utf = self._to_utf(ns_uri)
+        self._global_namespaces.append(prefix_utf)
+        xpath.xmlXPathRegisterNs(self._xpathCtxt,
+                                 _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+    cdef registerLocalNamespaces(self):
+        if self._namespaces is None:
+            return
+        for prefix_utf, ns_uri_utf in self._namespaces:
+            xpath.xmlXPathRegisterNs(
+                self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+    cdef registerGlobalNamespaces(self):
+        cdef list ns_prefixes = _find_all_extension_prefixes()
+        if python.PyList_GET_SIZE(ns_prefixes) > 0:
+            for prefix_utf, ns_uri_utf in ns_prefixes:
+                self._global_namespaces.append(prefix_utf)
+                xpath.xmlXPathRegisterNs(
+                    self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf))
+
+    cdef unregisterGlobalNamespaces(self):
+        if python.PyList_GET_SIZE(self._global_namespaces) > 0:
+            for prefix_utf in self._global_namespaces:
+                xpath.xmlXPathRegisterNs(self._xpathCtxt,
+                                         _xcstr(prefix_utf), NULL)
+            del self._global_namespaces[:]
+    
+    cdef void _unregisterNamespace(self, prefix_utf) noexcept:
+        xpath.xmlXPathRegisterNs(self._xpathCtxt,
+                                 _xcstr(prefix_utf), NULL)
+    
+    # extension functions
+
+    cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1:
+        if self._extensions is None:
+            self._extensions = {}
+        self._extensions[(ns_utf, name_utf)] = function
+        return 0
+
+    cdef registerGlobalFunctions(self, void* ctxt,
+                                 _register_function reg_func):
+        cdef python.PyObject* dict_result
+        cdef dict d
+        for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems():
+            dict_result = python.PyDict_GetItem(
+                self._function_cache, ns_utf)
+            if dict_result is not NULL:
+                d = <dict>dict_result
+            else:
+                d = {}
+                self._function_cache[ns_utf] = d
+            for name_utf, function in ns_functions.iteritems():
+                d[name_utf] = function
+                reg_func(ctxt, name_utf, ns_utf)
+
+    cdef registerLocalFunctions(self, void* ctxt,
+                                _register_function reg_func):
+        cdef python.PyObject* dict_result
+        cdef dict d
+        if self._extensions is None:
+            return # done
+        last_ns = None
+        d = None
+        for (ns_utf, name_utf), function in self._extensions.iteritems():
+            if ns_utf is not last_ns or d is None:
+                last_ns = ns_utf
+                dict_result = python.PyDict_GetItem(
+                    self._function_cache, ns_utf)
+                if dict_result is not NULL:
+                    d = <dict>dict_result
+                else:
+                    d = {}
+                    self._function_cache[ns_utf] = d
+            d[name_utf] = function
+            reg_func(ctxt, name_utf, ns_utf)
+
+    cdef unregisterAllFunctions(self, void* ctxt,
+                                      _register_function unreg_func):
+        for ns_utf, functions in self._function_cache.iteritems():
+            for name_utf in functions:
+                unreg_func(ctxt, name_utf, ns_utf)
+
+    cdef unregisterGlobalFunctions(self, void* ctxt,
+                                         _register_function unreg_func):
+        for ns_utf, functions in self._function_cache.items():
+            for name_utf in functions:
+                if self._extensions is None or \
+                       (ns_utf, name_utf) not in self._extensions:
+                    unreg_func(ctxt, name_utf, ns_utf)
+
+    @cython.final
+    cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name):
+        """Lookup an extension function in the cache and return it.
+
+        Parameters: c_ns_uri may be NULL, c_name must not be NULL
+        """
+        cdef python.PyObject* c_dict
+        cdef python.PyObject* dict_result
+        c_dict = python.PyDict_GetItem(
+            self._function_cache, None if c_ns_uri is NULL else c_ns_uri)
+        if c_dict is not NULL:
+            dict_result = python.PyDict_GetItem(
+                <object>c_dict, <unsigned char*>c_name)
+            if dict_result is not NULL:
+                return <object>dict_result
+        return None
+
+    # Python access to the XPath context for extension functions
+
+    @property
+    def context_node(self):
+        cdef xmlNode* c_node
+        if self._xpathCtxt is NULL:
+            raise XPathError, \
+                "XPath context is only usable during the evaluation"
+        c_node = self._xpathCtxt.node
+        if c_node is NULL:
+            raise XPathError, "no context node"
+        if c_node.doc != self._xpathCtxt.doc:
+            raise XPathError, \
+                "document-external context nodes are not supported"
+        if self._doc is None:
+            raise XPathError, "document context is missing"
+        return _elementFactory(self._doc, c_node)
+
+    @property
+    def eval_context(self):
+        if self._eval_context_dict is None:
+            self._eval_context_dict = {}
+        return self._eval_context_dict
+
+    # Python reference keeping during XPath function evaluation
+
+    @cython.final
+    cdef _release_temp_refs(self):
+        "Free temporarily referenced objects from this context."
+        self._temp_refs.clear()
+        self._temp_documents.clear()
+
+    @cython.final
+    cdef _hold(self, obj):
+        """A way to temporarily hold references to nodes in the evaluator.
+
+        This is needed because otherwise nodes created in XPath extension
+        functions would be reference counted too soon, during the XPath
+        evaluation.  This is most important in the case of exceptions.
+        """
+        cdef _Element element
+        if isinstance(obj, _Element):
+            self._temp_refs.add(obj)
+            self._temp_documents.add((<_Element>obj)._doc)
+            return
+        elif _isString(obj) or not python.PySequence_Check(obj):
+            return
+        for o in obj:
+            if isinstance(o, _Element):
+                #print "Holding element:", <int>element._c_node
+                self._temp_refs.add(o)
+                #print "Holding document:", <int>element._doc._c_doc
+                self._temp_documents.add((<_Element>o)._doc)
+
+    @cython.final
+    cdef _Document _findDocumentForNode(self, xmlNode* c_node):
+        """If an XPath expression returns an element from a different
+        document than the current context document, we call this to
+        see if it was possibly created by an extension and is a known
+        document instance.
+        """
+        cdef _Document doc
+        for doc in self._temp_documents:
+            if doc is not None and doc._c_doc is c_node.doc:
+                return doc
+        return None
+
+
+# libxml2 keeps these error messages in a static array in its code
+# and doesn't give us access to them ...
+
+cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = (
+    b"Ok",
+    b"Number encoding",
+    b"Unfinished literal",
+    b"Start of literal",
+    b"Expected $ for variable reference",
+    b"Undefined variable",
+    b"Invalid predicate",
+    b"Invalid expression",
+    b"Missing closing curly brace",
+    b"Unregistered function",
+    b"Invalid operand",
+    b"Invalid type",
+    b"Invalid number of arguments",
+    b"Invalid context size",
+    b"Invalid context position",
+    b"Memory allocation error",
+    b"Syntax error",
+    b"Resource error",
+    b"Sub resource error",
+    b"Undefined namespace prefix",
+    b"Encoding error",
+    b"Char out of XML range",
+    b"Invalid or incomplete context",
+    b"Stack usage error",
+    b"Forbidden variable\n",
+    b"?? Unknown error ??\n",
+)
+
+cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil:
+    cdef xmlerror.xmlError error
+    cdef int xpath_code
+    if c_error.message is not NULL:
+        error.message = c_error.message
+    else:
+        xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK
+        if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES):
+            error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code])
+        else:
+            error.message = b"unknown error"
+    error.domain = c_error.domain
+    error.code = c_error.code
+    error.level = c_error.level
+    error.line = c_error.line
+    error.int2 = c_error.int1 # column
+    error.file = c_error.file
+    error.node = NULL
+
+    (<_BaseContext>c_ctxt)._error_log._receive(&error)
+
+cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
+    if not __DEBUG:
+        return
+    if c_context is NULL:
+        _forwardError(NULL, error)
+    else:
+        _forwardXPathError(c_context, error)
+
+
+def Extension(module, function_mapping=None, *, ns=None):
+    """Extension(module, function_mapping=None, ns=None)
+
+    Build a dictionary of extension functions from the functions
+    defined in a module or the methods of an object.
+
+    As second argument, you can pass an additional mapping of
+    attribute names to XPath function names, or a list of function
+    names that should be taken.
+
+    The ``ns`` keyword argument accepts a namespace URI for the XPath
+    functions.
+    """
+    cdef dict functions = {}
+    if isinstance(function_mapping, dict):
+        for function_name, xpath_name in function_mapping.items():
+            functions[(ns, xpath_name)] = getattr(module, function_name)
+    else:
+        if function_mapping is None:
+            function_mapping = [ name for name in dir(module)
+                                 if not name.startswith('_') ]
+        for function_name in function_mapping:
+            functions[(ns, function_name)] = getattr(module, function_name)
+    return functions
+
+################################################################################
+# EXSLT regexp implementation
+
+@cython.final
+@cython.internal
+cdef class _ExsltRegExp:
+    cdef dict _compile_map
+    def __cinit__(self):
+        self._compile_map = {}
+
+    cdef _make_string(self, value):
+        if _isString(value):
+            return value
+        elif isinstance(value, list):
+            # node set: take recursive text concatenation of first element
+            if python.PyList_GET_SIZE(value) == 0:
+                return ''
+            firstnode = value[0]
+            if _isString(firstnode):
+                return firstnode
+            elif isinstance(firstnode, _Element):
+                c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node)
+                if c_text is NULL:
+                    raise MemoryError()
+                try:
+                    return funicode(c_text)
+                finally:
+                    tree.xmlFree(c_text)
+            else:
+                return unicode(firstnode)
+        else:
+            return unicode(value)
+
+    cdef _compile(self, rexp, ignore_case):
+        cdef python.PyObject* c_result
+        rexp = self._make_string(rexp)
+        key = (rexp, ignore_case)
+        c_result = python.PyDict_GetItem(self._compile_map, key)
+        if c_result is not NULL:
+            return <object>c_result
+        py_flags = re.UNICODE
+        if ignore_case:
+            py_flags = py_flags | re.IGNORECASE
+        rexp_compiled = re.compile(rexp, py_flags)
+        self._compile_map[key] = rexp_compiled
+        return rexp_compiled
+
+    def test(self, ctxt, s, rexp, flags=''):
+        flags = self._make_string(flags)
+        s = self._make_string(s)
+        rexpc = self._compile(rexp, 'i' in flags)
+        if rexpc.search(s) is None:
+            return False
+        else:
+            return True
+
+    def match(self, ctxt, s, rexp, flags=''):
+        cdef list result_list
+        flags = self._make_string(flags)
+        s = self._make_string(s)
+        rexpc = self._compile(rexp, 'i' in flags)
+        if 'g' in flags:
+            results = rexpc.findall(s)
+            if not results:
+                return ()
+        else:
+            result = rexpc.search(s)
+            if not result:
+                return ()
+            results = [ result.group() ]
+            results.extend( result.groups('') )
+        result_list = []
+        root = Element('matches')
+        for s_match in results:
+            if python.PyTuple_CheckExact(s_match):
+                s_match = ''.join(s_match)
+            elem = SubElement(root, 'match')
+            elem.text = s_match
+            result_list.append(elem)
+        return result_list
+
+    def replace(self, ctxt, s, rexp, flags, replacement):
+        replacement = self._make_string(replacement)
+        flags = self._make_string(flags)
+        s = self._make_string(s)
+        rexpc = self._compile(rexp, 'i' in flags)
+        count: object = 0 if 'g' in flags else 1
+        return rexpc.sub(replacement, s, count)
+
+    cdef _register_in_context(self, _BaseContext context):
+        ns = b"http://exslt.org/regular-expressions"
+        context._addLocalExtensionFunction(ns, b"test",    self.test)
+        context._addLocalExtensionFunction(ns, b"match",   self.match)
+        context._addLocalExtensionFunction(ns, b"replace", self.replace)
+
+
+################################################################################
+# helper functions
+
+cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc,
+                                            _BaseContext context) except NULL:
+    cdef xpath.xmlNodeSet* resultSet
+    cdef _Element fake_node = None
+    cdef xmlNode* c_node
+
+    if isinstance(obj, unicode):
+        obj = _utf8(obj)
+    if isinstance(obj, bytes):
+        # libxml2 copies the string value
+        return xpath.xmlXPathNewCString(_cstr(obj))
+    if isinstance(obj, bool):
+        return xpath.xmlXPathNewBoolean(obj)
+    if python.PyNumber_Check(obj):
+        return xpath.xmlXPathNewFloat(obj)
+    if obj is None:
+        resultSet = xpath.xmlXPathNodeSetCreate(NULL)
+    elif isinstance(obj, _Element):
+        resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node)
+    elif python.PySequence_Check(obj):
+        resultSet = xpath.xmlXPathNodeSetCreate(NULL)
+        try:
+            for value in obj:
+                if isinstance(value, _Element):
+                    if context is not None:
+                        context._hold(value)
+                    xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node)
+                else:
+                    if context is None or doc is None:
+                        raise XPathResultError, \
+                              f"Non-Element values not supported at this point - got {value!r}"
+                    # support strings by appending text nodes to an Element
+                    if isinstance(value, unicode):
+                        value = _utf8(value)
+                    if isinstance(value, bytes):
+                        if fake_node is None:
+                            fake_node = _makeElement("text-root", NULL, doc, None,
+                                                     None, None, None, None, None)
+                            context._hold(fake_node)
+                        else:
+                            # append a comment node to keep the text nodes separate
+                            c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"")
+                            if c_node is NULL:
+                                raise MemoryError()
+                            tree.xmlAddChild(fake_node._c_node, c_node)
+                        context._hold(value)
+                        c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value))
+                        if c_node is NULL:
+                            raise MemoryError()
+                        tree.xmlAddChild(fake_node._c_node, c_node)
+                        xpath.xmlXPathNodeSetAdd(resultSet, c_node)
+                    else:
+                        raise XPathResultError, \
+                              f"This is not a supported node-set result: {value!r}"
+        except:
+            xpath.xmlXPathFreeNodeSet(resultSet)
+            raise
+    else:
+        raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}"
+    return xpath.xmlXPathWrapNodeSet(resultSet)
+
+cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj,
+                               _Document doc, _BaseContext context):
+    if xpathObj.type == xpath.XPATH_UNDEFINED:
+        raise XPathResultError, "Undefined xpath result"
+    elif xpathObj.type == xpath.XPATH_NODESET:
+        return _createNodeSetResult(xpathObj, doc, context)
+    elif xpathObj.type == xpath.XPATH_BOOLEAN:
+        return xpathObj.boolval
+    elif xpathObj.type == xpath.XPATH_NUMBER:
+        return xpathObj.floatval
+    elif xpathObj.type == xpath.XPATH_STRING:
+        stringval = funicode(xpathObj.stringval)
+        if context._build_smart_strings:
+            stringval = _elementStringResultFactory(
+                stringval, None, None, False)
+        return stringval
+    elif xpathObj.type == xpath.XPATH_POINT:
+        raise NotImplementedError, "XPATH_POINT"
+    elif xpathObj.type == xpath.XPATH_RANGE:
+        raise NotImplementedError, "XPATH_RANGE"
+    elif xpathObj.type == xpath.XPATH_LOCATIONSET:
+        raise NotImplementedError, "XPATH_LOCATIONSET"
+    elif xpathObj.type == xpath.XPATH_USERS:
+        raise NotImplementedError, "XPATH_USERS"
+    elif xpathObj.type == xpath.XPATH_XSLT_TREE:
+        return _createNodeSetResult(xpathObj, doc, context)
+    else:
+        raise XPathResultError, f"Unknown xpath result {xpathObj.type}"
+
+cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc,
+                                 _BaseContext context):
+    cdef xmlNode* c_node
+    cdef int i
+    cdef list result
+    result = []
+    if xpathObj.nodesetval is NULL:
+        return result
+    for i in range(xpathObj.nodesetval.nodeNr):
+        c_node = xpathObj.nodesetval.nodeTab[i]
+        _unpackNodeSetEntry(result, c_node, doc, context,
+                            xpathObj.type == xpath.XPATH_XSLT_TREE)
+    return result
+
+cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc,
+                         _BaseContext context, bint is_fragment):
+    cdef xmlNode* c_child
+    if _isElement(c_node):
+        if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+            # XXX: works, but maybe not always the right thing to do?
+            # XPath: only runs when extensions create or copy trees
+            #        -> we store Python refs to these, so that is OK
+            # XSLT: can it leak when merging trees from multiple sources?
+            c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+            # FIXME: call _instantiateElementFromXPath() instead?
+        results.append(
+            _fakeDocElementFactory(doc, c_node))
+    elif c_node.type == tree.XML_TEXT_NODE or \
+             c_node.type == tree.XML_CDATA_SECTION_NODE or \
+             c_node.type == tree.XML_ATTRIBUTE_NODE:
+        results.append(
+            _buildElementStringResult(doc, c_node, context))
+    elif c_node.type == tree.XML_NAMESPACE_DECL:
+        results.append( (funicodeOrNone((<xmlNs*>c_node).prefix),
+                         funicodeOrNone((<xmlNs*>c_node).href)) )
+    elif c_node.type == tree.XML_DOCUMENT_NODE or \
+            c_node.type == tree.XML_HTML_DOCUMENT_NODE:
+        # ignored for everything but result tree fragments
+        if is_fragment:
+            c_child = c_node.children
+            while c_child is not NULL:
+                _unpackNodeSetEntry(results, c_child, doc, context, 0)
+                c_child = c_child.next
+    elif c_node.type == tree.XML_XINCLUDE_START or \
+            c_node.type == tree.XML_XINCLUDE_END:
+        pass
+    else:
+        raise NotImplementedError, \
+            f"Not yet implemented result node type: {c_node.type}"
+
+cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept:
+    """Free the XPath object, but *never* free the *content* of node sets.
+    Python dealloc will do that for us.
+    """
+    if xpathObj.nodesetval is not NULL:
+        xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval)
+        xpathObj.nodesetval = NULL
+    xpath.xmlXPathFreeObject(xpathObj)
+
+cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc,
+                                           _BaseContext context):
+    # NOTE: this may copy the element - only call this when it can't leak
+    if c_node.doc != doc._c_doc and c_node.doc._private is NULL:
+        # not from the context document and not from a fake document
+        # either => may still be from a known document, e.g. one
+        # created by an extension function
+        node_doc = context._findDocumentForNode(c_node)
+        if node_doc is None:
+            # not from a known document at all! => can only make a
+            # safety copy here
+            c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1)
+        else:
+            doc = node_doc
+    return _fakeDocElementFactory(doc, c_node)
+
+################################################################################
+# special str/unicode subclasses
+
+@cython.final
+cdef class _ElementUnicodeResult(unicode):
+    cdef _Element _parent
+    cdef readonly object attrname
+    cdef readonly bint is_tail
+
+    def getparent(self):
+        return self._parent
+
+    @property
+    def is_text(self):
+        return self._parent is not None and not (self.is_tail or self.attrname is not None)
+
+    @property
+    def is_attribute(self):
+        return self.attrname is not None
+
+cdef object _elementStringResultFactory(string_value, _Element parent,
+                                        attrname, bint is_tail):
+    result = _ElementUnicodeResult(string_value)
+    result._parent = parent
+    result.is_tail = is_tail
+    result.attrname = attrname
+    return result
+
+cdef object _buildElementStringResult(_Document doc, xmlNode* c_node,
+                                      _BaseContext context):
+    cdef _Element parent = None
+    cdef object attrname = None
+    cdef xmlNode* c_element
+    cdef bint is_tail
+
+    if c_node.type == tree.XML_ATTRIBUTE_NODE:
+        attrname = _namespacedName(c_node)
+        is_tail = 0
+        s = tree.xmlNodeGetContent(c_node)
+        try:
+            value = funicode(s)
+        finally:
+            tree.xmlFree(s)
+        c_element = NULL
+    else:
+        #assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type"
+        # may be tail text or normal text
+        value = funicode(c_node.content)
+        c_element = _previousElement(c_node)
+        is_tail = c_element is not NULL
+
+    if not context._build_smart_strings:
+        return value
+
+    if c_element is NULL:
+        # non-tail text or attribute text
+        c_element = c_node.parent
+        while c_element is not NULL and not _isElement(c_element):
+            c_element = c_element.parent
+
+    if c_element is not NULL:
+        parent = _instantiateElementFromXPath(c_element, doc, context)
+
+    return _elementStringResultFactory(
+        value, parent, attrname, is_tail)
+
+################################################################################
+# callbacks for XPath/XSLT extension functions
+
+cdef void _extension_function_call(_BaseContext context, function,
+                                   xpath.xmlXPathParserContext* ctxt, int nargs) noexcept:
+    cdef _Document doc
+    cdef xpath.xmlXPathObject* obj
+    cdef list args
+    cdef int i
+    doc = context._doc
+    try:
+        args = []
+        for i in range(nargs):
+            obj = xpath.valuePop(ctxt)
+            o = _unwrapXPathObject(obj, doc, context)
+            _freeXPathObject(obj)
+            args.append(o)
+        args.reverse()
+
+        res = function(context, *args)
+        # wrap result for XPath consumption
+        obj = _wrapXPathObject(res, doc, context)
+        # prevent Python from deallocating elements handed to libxml2
+        context._hold(res)
+        xpath.valuePush(ctxt, obj)
+    except:
+        xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR)
+        context._exc._store_raised()
+    finally:
+        return  # swallow any further exceptions
+
+# lookup the function by name and call it
+
+cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt,
+                               int nargs) noexcept with gil:
+    cdef _BaseContext context
+    cdef xpath.xmlXPathContext* rctxt = ctxt.context
+    context = <_BaseContext> rctxt.userData
+    try:
+        function = context._find_cached_function(rctxt.functionURI, rctxt.function)
+        if function is not None:
+            _extension_function_call(context, function, ctxt, nargs)
+        else:
+            xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
+            context._exc._store_exception(XPathFunctionError(
+                f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found"))
+    except:
+        # may not be the right error, but we need to tell libxml2 *something*
+        xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR)
+        context._exc._store_raised()
+    finally:
+        return  # swallow any further exceptions
author	S. Solomon Darnell	2025-03-28 21:52:21 -0500
committer	S. Solomon Darnell	2025-03-28 21:52:21 -0500
commit	4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
tree	ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/extensions.pxi
parent	cc961e04ba734dd72309fb548a2f97d67d578813 (diff)
download	gn-ai-master.tar.gz