aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lxml/xpath.pxi
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/xpath.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/xpath.pxi487
1 files changed, 487 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/xpath.pxi b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi
new file mode 100644
index 00000000..352f6313
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi
@@ -0,0 +1,487 @@
+# XPath evaluation
+
+class XPathSyntaxError(LxmlSyntaxError, XPathError):
+ pass
+
+################################################################################
+# XPath
+
+cdef object _XPATH_SYNTAX_ERRORS = (
+ xmlerror.XML_XPATH_NUMBER_ERROR,
+ xmlerror.XML_XPATH_UNFINISHED_LITERAL_ERROR,
+ xmlerror.XML_XPATH_VARIABLE_REF_ERROR,
+ xmlerror.XML_XPATH_INVALID_PREDICATE_ERROR,
+ xmlerror.XML_XPATH_UNCLOSED_ERROR,
+ xmlerror.XML_XPATH_INVALID_CHAR_ERROR
+)
+
+cdef object _XPATH_EVAL_ERRORS = (
+ xmlerror.XML_XPATH_UNDEF_VARIABLE_ERROR,
+ xmlerror.XML_XPATH_UNDEF_PREFIX_ERROR,
+ xmlerror.XML_XPATH_UNKNOWN_FUNC_ERROR,
+ xmlerror.XML_XPATH_INVALID_OPERAND,
+ xmlerror.XML_XPATH_INVALID_TYPE,
+ xmlerror.XML_XPATH_INVALID_ARITY,
+ xmlerror.XML_XPATH_INVALID_CTXT_SIZE,
+ xmlerror.XML_XPATH_INVALID_CTXT_POSITION
+)
+
+cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf),
+ _xpath_function_call)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
+ _xpath_function_call)
+
+cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
+ if ns_utf is None:
+ return xpath.xmlXPathRegisterFunc(
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), NULL)
+ else:
+ return xpath.xmlXPathRegisterFuncNS(
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), NULL)
+
+
+@cython.final
+@cython.internal
+cdef class _XPathContext(_BaseContext):
+ cdef object _variables
+ def __init__(self, namespaces, extensions, error_log, enable_regexp, variables,
+ build_smart_strings):
+ self._variables = variables
+ _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
+ build_smart_strings)
+
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
+ self._set_xpath_context(xpathCtxt)
+ # This would be a good place to set up the XPath parser dict, but
+ # we cannot use the current thread dict as we do not know which
+ # thread will execute the XPath evaluator - so, no dict for now.
+ self.registerLocalNamespaces()
+ self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
+
+ cdef register_context(self, _Document doc):
+ self._register_context(doc)
+ self.registerGlobalNamespaces()
+ self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
+ self.registerExsltFunctions()
+ if self._variables is not None:
+ self.registerVariables(self._variables)
+
+ cdef unregister_context(self):
+ self.unregisterGlobalFunctions(
+ self._xpathCtxt, _unregister_xpath_function)
+ self.unregisterGlobalNamespaces()
+ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
+ self._cleanup_context()
+
+ cdef void registerExsltFunctions(self) noexcept:
+ if xslt.LIBXSLT_VERSION < 10125:
+ # we'd only execute dummy functions anyway
+ return
+ tree.xmlHashScan(
+ self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces,
+ self._xpathCtxt)
+
+ cdef registerVariables(self, variable_dict):
+ for name, value in variable_dict.items():
+ name_utf = self._to_utf(name)
+ xpath.xmlXPathRegisterVariable(
+ self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
+
+ cdef registerVariable(self, name, value):
+ name_utf = self._to_utf(name)
+ xpath.xmlXPathRegisterVariable(
+ self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
+
+
+cdef void _registerExsltFunctionsForNamespaces(
+ void* _c_href, void* _ctxt, const_xmlChar* c_prefix) noexcept:
+ c_href = <const_xmlChar*> _c_href
+ ctxt = <xpath.xmlXPathContext*> _ctxt
+
+ if tree.xmlStrcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0:
+ xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix)
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0:
+ xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix)
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0:
+ xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix)
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0:
+ xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix)
+
+
+cdef class _XPathEvaluatorBase:
+ cdef xpath.xmlXPathContext* _xpathCtxt
+ cdef _XPathContext _context
+ cdef python.PyThread_type_lock _eval_lock
+ cdef _ErrorLog _error_log
+ def __cinit__(self):
+ self._xpathCtxt = NULL
+ if config.ENABLE_THREADING:
+ self._eval_lock = python.PyThread_allocate_lock()
+ if self._eval_lock is NULL:
+ raise MemoryError()
+ self._error_log = _ErrorLog()
+
+ def __init__(self, namespaces, extensions, enable_regexp,
+ smart_strings):
+ self._context = _XPathContext(namespaces, extensions, self._error_log,
+ enable_regexp, None, smart_strings)
+
+ @property
+ def error_log(self):
+ assert self._error_log is not None, "XPath evaluator not initialised"
+ return self._error_log.copy()
+
+ def __dealloc__(self):
+ if self._xpathCtxt is not NULL:
+ xpath.xmlXPathFreeContext(self._xpathCtxt)
+ if config.ENABLE_THREADING:
+ if self._eval_lock is not NULL:
+ python.PyThread_free_lock(self._eval_lock)
+
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
+ self._xpathCtxt = xpathCtxt
+ self._context.set_context(xpathCtxt)
+
+ cdef bint _checkAbsolutePath(self, char* path) noexcept:
+ cdef char c
+ if path is NULL:
+ return 0
+ c = path[0]
+ while c == c' ' or c == c'\t':
+ path = path + 1
+ c = path[0]
+ return c == c'/'
+
+ @cython.final
+ cdef int _lock(self) except -1:
+ cdef int result
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ with nogil:
+ result = python.PyThread_acquire_lock(
+ self._eval_lock, python.WAIT_LOCK)
+ if result == 0:
+ raise XPathError, "XPath evaluator locking failed"
+ return 0
+
+ @cython.final
+ cdef void _unlock(self) noexcept:
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
+ python.PyThread_release_lock(self._eval_lock)
+
+ cdef _build_parse_error(self):
+ cdef _BaseErrorLog entries
+ entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
+ if entries:
+ message = entries._buildExceptionMessage(None)
+ if message is not None:
+ return XPathSyntaxError(message, self._error_log)
+ return XPathSyntaxError(
+ self._error_log._buildExceptionMessage("Error in xpath expression"),
+ self._error_log)
+
+ cdef _build_eval_error(self):
+ cdef _BaseErrorLog entries
+ entries = self._error_log.filter_types(_XPATH_EVAL_ERRORS)
+ if not entries:
+ entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
+ if entries:
+ message = entries._buildExceptionMessage(None)
+ if message is not None:
+ return XPathEvalError(message, self._error_log)
+ return XPathEvalError(
+ self._error_log._buildExceptionMessage("Error in xpath expression"),
+ self._error_log)
+
+ cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc):
+ if self._context._exc._has_raised():
+ if xpathObj is not NULL:
+ _freeXPathObject(xpathObj)
+ xpathObj = NULL
+ self._context._release_temp_refs()
+ self._context._exc._raise_if_stored()
+
+ if xpathObj is NULL:
+ self._context._release_temp_refs()
+ raise self._build_eval_error()
+
+ try:
+ result = _unwrapXPathObject(xpathObj, doc, self._context)
+ finally:
+ _freeXPathObject(xpathObj)
+ self._context._release_temp_refs()
+
+ return result
+
+
+cdef class XPathElementEvaluator(_XPathEvaluatorBase):
+ """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
+ Create an XPath evaluator for an element.
+
+ Absolute XPath expressions (starting with '/') will be evaluated against
+ the ElementTree as returned by getroottree().
+
+ Additional namespace declarations can be passed with the
+ 'namespace' keyword argument. EXSLT regular expression support
+ can be disabled with the 'regexp' boolean keyword (defaults to
+ True). Smart strings will be returned for string results unless
+ you pass ``smart_strings=False``.
+ """
+ cdef _Element _element
+ def __init__(self, _Element element not None, *, namespaces=None,
+ extensions=None, regexp=True, smart_strings=True):
+ cdef xpath.xmlXPathContext* xpathCtxt
+ cdef int ns_register_status
+ cdef _Document doc
+ _assertValidNode(element)
+ _assertValidDoc(element._doc)
+ self._element = element
+ doc = element._doc
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions,
+ regexp, smart_strings)
+ xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
+ if xpathCtxt is NULL:
+ raise MemoryError()
+ self.set_context(xpathCtxt)
+
+ def register_namespace(self, prefix, uri):
+ """Register a namespace with the XPath context.
+ """
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
+ self._context.addNamespace(prefix, uri)
+
+ def register_namespaces(self, namespaces):
+ """Register a prefix -> uri dict.
+ """
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
+ for prefix, uri in namespaces.items():
+ self._context.addNamespace(prefix, uri)
+
+ def __call__(self, _path, **_variables):
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
+
+ Variables may be provided as keyword arguments. Note that namespaces
+ are currently not supported for variables.
+
+ Absolute XPath expressions (starting with '/') will be evaluated
+ against the ElementTree as returned by getroottree().
+ """
+ cdef xpath.xmlXPathObject* xpathObj
+ cdef _Document doc
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
+ path = _utf8(_path)
+ doc = self._element._doc
+
+ self._lock()
+ self._xpathCtxt.node = self._element._c_node
+ try:
+ self._context.register_context(doc)
+ self._context.registerVariables(_variables)
+ c_path = _xcstr(path)
+ with nogil:
+ xpathObj = xpath.xmlXPathEvalExpression(
+ c_path, self._xpathCtxt)
+ result = self._handle_result(xpathObj, doc)
+ finally:
+ self._context.unregister_context()
+ self._unlock()
+
+ return result
+
+
+cdef class XPathDocumentEvaluator(XPathElementEvaluator):
+ """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True, smart_strings=True)
+ Create an XPath evaluator for an ElementTree.
+
+ Additional namespace declarations can be passed with the
+ 'namespace' keyword argument. EXSLT regular expression support
+ can be disabled with the 'regexp' boolean keyword (defaults to
+ True). Smart strings will be returned for string results unless
+ you pass ``smart_strings=False``.
+ """
+ def __init__(self, _ElementTree etree not None, *, namespaces=None,
+ extensions=None, regexp=True, smart_strings=True):
+ XPathElementEvaluator.__init__(
+ self, etree._context_node, namespaces=namespaces,
+ extensions=extensions, regexp=regexp,
+ smart_strings=smart_strings)
+
+ def __call__(self, _path, **_variables):
+ """__call__(self, _path, **_variables)
+
+ Evaluate an XPath expression on the document.
+
+ Variables may be provided as keyword arguments. Note that namespaces
+ are currently not supported for variables.
+ """
+ cdef xpath.xmlXPathObject* xpathObj
+ cdef xmlDoc* c_doc
+ cdef _Document doc
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
+ path = _utf8(_path)
+ doc = self._element._doc
+
+ self._lock()
+ try:
+ self._context.register_context(doc)
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
+ try:
+ self._context.registerVariables(_variables)
+ c_path = _xcstr(path)
+ with nogil:
+ self._xpathCtxt.doc = c_doc
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
+ xpathObj = xpath.xmlXPathEvalExpression(
+ c_path, self._xpathCtxt)
+ result = self._handle_result(xpathObj, doc)
+ finally:
+ _destroyFakeDoc(doc._c_doc, c_doc)
+ self._context.unregister_context()
+ finally:
+ self._unlock()
+
+ return result
+
+
+def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
+ regexp=True, smart_strings=True):
+ """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
+
+ Creates an XPath evaluator for an ElementTree or an Element.
+
+ The resulting object can be called with an XPath expression as argument
+ and XPath variables provided as keyword arguments.
+
+ Additional namespace declarations can be passed with the
+ 'namespace' keyword argument. EXSLT regular expression support
+ can be disabled with the 'regexp' boolean keyword (defaults to
+ True). Smart strings will be returned for string results unless
+ you pass ``smart_strings=False``.
+ """
+ if isinstance(etree_or_element, _ElementTree):
+ return XPathDocumentEvaluator(
+ etree_or_element, namespaces=namespaces,
+ extensions=extensions, regexp=regexp, smart_strings=smart_strings)
+ else:
+ return XPathElementEvaluator(
+ etree_or_element, namespaces=namespaces,
+ extensions=extensions, regexp=regexp, smart_strings=smart_strings)
+
+
+cdef class XPath(_XPathEvaluatorBase):
+ """XPath(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True)
+ A compiled XPath expression that can be called on Elements and ElementTrees.
+
+ Besides the XPath expression, you can pass prefix-namespace
+ mappings and extension functions to the constructor through the
+ keyword arguments ``namespaces`` and ``extensions``. EXSLT
+ regular expression support can be disabled with the 'regexp'
+ boolean keyword (defaults to True). Smart strings will be
+ returned for string results unless you pass
+ ``smart_strings=False``.
+ """
+ cdef xpath.xmlXPathCompExpr* _xpath
+ cdef bytes _path
+ def __cinit__(self):
+ self._xpath = NULL
+
+ def __init__(self, path, *, namespaces=None, extensions=None,
+ regexp=True, smart_strings=True):
+ cdef xpath.xmlXPathContext* xpathCtxt
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions,
+ regexp, smart_strings)
+ self._path = _utf8(path)
+ xpathCtxt = xpath.xmlXPathNewContext(NULL)
+ if xpathCtxt is NULL:
+ raise MemoryError()
+ self.set_context(xpathCtxt)
+ self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _xcstr(self._path))
+ if self._xpath is NULL:
+ raise self._build_parse_error()
+
+ def __call__(self, _etree_or_element, **_variables):
+ "__call__(self, _etree_or_element, **_variables)"
+ cdef xpath.xmlXPathObject* xpathObj
+ cdef _Document document
+ cdef _Element element
+
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
+ document = _documentOrRaise(_etree_or_element)
+ element = _rootNodeOrRaise(_etree_or_element)
+
+ self._lock()
+ self._xpathCtxt.doc = document._c_doc
+ self._xpathCtxt.node = element._c_node
+
+ try:
+ self._context.register_context(document)
+ self._context.registerVariables(_variables)
+ with nogil:
+ xpathObj = xpath.xmlXPathCompiledEval(
+ self._xpath, self._xpathCtxt)
+ result = self._handle_result(xpathObj, document)
+ finally:
+ self._context.unregister_context()
+ self._unlock()
+ return result
+
+ @property
+ def path(self):
+ """The literal XPath expression.
+ """
+ return self._path.decode('UTF-8')
+
+ def __dealloc__(self):
+ if self._xpath is not NULL:
+ xpath.xmlXPathFreeCompExpr(self._xpath)
+
+ def __repr__(self):
+ return self.path
+
+
+cdef object _replace_strings = re.compile(b'("[^"]*")|(\'[^\']*\')').sub
+cdef object _find_namespaces = re.compile(b'({[^}]+})').findall
+
+cdef class ETXPath(XPath):
+ """ETXPath(self, path, extensions=None, regexp=True, smart_strings=True)
+ Special XPath class that supports the ElementTree {uri} notation for namespaces.
+
+ Note that this class does not accept the ``namespace`` keyword
+ argument. All namespaces must be passed as part of the path
+ string. Smart strings will be returned for string results unless
+ you pass ``smart_strings=False``.
+ """
+ def __init__(self, path, *, extensions=None, regexp=True,
+ smart_strings=True):
+ path, namespaces = self._nsextract_path(path)
+ XPath.__init__(self, path, namespaces=namespaces,
+ extensions=extensions, regexp=regexp,
+ smart_strings=smart_strings)
+
+ cdef _nsextract_path(self, path):
+ # replace {namespaces} by new prefixes
+ cdef dict namespaces = {}
+ cdef list namespace_defs = []
+ cdef int i
+ path_utf = _utf8(path)
+ stripped_path = _replace_strings(b'', path_utf) # remove string literals
+ i = 1
+ for namespace_def in _find_namespaces(stripped_path):
+ if namespace_def not in namespace_defs:
+ prefix = python.PyBytes_FromFormat("__xpp%02d", i)
+ i += 1
+ namespace_defs.append(namespace_def)
+ namespace = namespace_def[1:-1] # remove '{}'
+ namespace = (<bytes>namespace).decode('utf8')
+ namespaces[prefix.decode('utf8')] = namespace
+ prefix_str = prefix + b':'
+ # FIXME: this also replaces {namespaces} within strings!
+ path_utf = path_utf.replace(namespace_def, prefix_str)
+ path = path_utf.decode('utf8')
+ return path, namespaces