diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/xpath.pxi | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/xpath.pxi')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lxml/xpath.pxi | 487 |
1 files changed, 487 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/xpath.pxi b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi new file mode 100644 index 00000000..352f6313 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi @@ -0,0 +1,487 @@ +# XPath evaluation + +class XPathSyntaxError(LxmlSyntaxError, XPathError): + pass + +################################################################################ +# XPath + +cdef object _XPATH_SYNTAX_ERRORS = ( + xmlerror.XML_XPATH_NUMBER_ERROR, + xmlerror.XML_XPATH_UNFINISHED_LITERAL_ERROR, + xmlerror.XML_XPATH_VARIABLE_REF_ERROR, + xmlerror.XML_XPATH_INVALID_PREDICATE_ERROR, + xmlerror.XML_XPATH_UNCLOSED_ERROR, + xmlerror.XML_XPATH_INVALID_CHAR_ERROR +) + +cdef object _XPATH_EVAL_ERRORS = ( + xmlerror.XML_XPATH_UNDEF_VARIABLE_ERROR, + xmlerror.XML_XPATH_UNDEF_PREFIX_ERROR, + xmlerror.XML_XPATH_UNKNOWN_FUNC_ERROR, + xmlerror.XML_XPATH_INVALID_OPERAND, + xmlerror.XML_XPATH_INVALID_TYPE, + xmlerror.XML_XPATH_INVALID_ARITY, + xmlerror.XML_XPATH_INVALID_CTXT_SIZE, + xmlerror.XML_XPATH_INVALID_CTXT_POSITION +) + +cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf) noexcept: + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), + _xpath_function_call) + else: + return xpath.xmlXPathRegisterFuncNS( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), + _xpath_function_call) + +cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf) noexcept: + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), NULL) + else: + return xpath.xmlXPathRegisterFuncNS( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), NULL) + + +@cython.final +@cython.internal +cdef class _XPathContext(_BaseContext): + cdef object _variables + def __init__(self, namespaces, extensions, error_log, enable_regexp, variables, + build_smart_strings): + self._variables = variables + _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings) + + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): + self._set_xpath_context(xpathCtxt) + # This would be a good place to set up the XPath parser dict, but + # we cannot use the current thread dict as we do not know which + # thread will execute the XPath evaluator - so, no dict for now. + self.registerLocalNamespaces() + self.registerLocalFunctions(xpathCtxt, _register_xpath_function) + + cdef register_context(self, _Document doc): + self._register_context(doc) + self.registerGlobalNamespaces() + self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function) + self.registerExsltFunctions() + if self._variables is not None: + self.registerVariables(self._variables) + + cdef unregister_context(self): + self.unregisterGlobalFunctions( + self._xpathCtxt, _unregister_xpath_function) + self.unregisterGlobalNamespaces() + xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt) + self._cleanup_context() + + cdef void registerExsltFunctions(self) noexcept: + if xslt.LIBXSLT_VERSION < 10125: + # we'd only execute dummy functions anyway + return + tree.xmlHashScan( + self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces, + self._xpathCtxt) + + cdef registerVariables(self, variable_dict): + for name, value in variable_dict.items(): + name_utf = self._to_utf(name) + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None)) + + cdef registerVariable(self, name, value): + name_utf = self._to_utf(name) + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None)) + + +cdef void _registerExsltFunctionsForNamespaces( + void* _c_href, void* _ctxt, const_xmlChar* c_prefix) noexcept: + c_href = <const_xmlChar*> _c_href + ctxt = <xpath.xmlXPathContext*> _ctxt + + if tree.xmlStrcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0: + xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0: + xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0: + xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0: + xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix) + + +cdef class _XPathEvaluatorBase: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _XPathContext _context + cdef python.PyThread_type_lock _eval_lock + cdef _ErrorLog _error_log + def __cinit__(self): + self._xpathCtxt = NULL + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + raise MemoryError() + self._error_log = _ErrorLog() + + def __init__(self, namespaces, extensions, enable_regexp, + smart_strings): + self._context = _XPathContext(namespaces, extensions, self._error_log, + enable_regexp, None, smart_strings) + + @property + def error_log(self): + assert self._error_log is not None, "XPath evaluator not initialised" + return self._error_log.copy() + + def __dealloc__(self): + if self._xpathCtxt is not NULL: + xpath.xmlXPathFreeContext(self._xpathCtxt) + if config.ENABLE_THREADING: + if self._eval_lock is not NULL: + python.PyThread_free_lock(self._eval_lock) + + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + self._context.set_context(xpathCtxt) + + cdef bint _checkAbsolutePath(self, char* path) noexcept: + cdef char c + if path is NULL: + return 0 + c = path[0] + while c == c' ' or c == c'\t': + path = path + 1 + c = path[0] + return c == c'/' + + @cython.final + cdef int _lock(self) except -1: + cdef int result + if config.ENABLE_THREADING and self._eval_lock != NULL: + with nogil: + result = python.PyThread_acquire_lock( + self._eval_lock, python.WAIT_LOCK) + if result == 0: + raise XPathError, "XPath evaluator locking failed" + return 0 + + @cython.final + cdef void _unlock(self) noexcept: + if config.ENABLE_THREADING and self._eval_lock != NULL: + python.PyThread_release_lock(self._eval_lock) + + cdef _build_parse_error(self): + cdef _BaseErrorLog entries + entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS) + if entries: + message = entries._buildExceptionMessage(None) + if message is not None: + return XPathSyntaxError(message, self._error_log) + return XPathSyntaxError( + self._error_log._buildExceptionMessage("Error in xpath expression"), + self._error_log) + + cdef _build_eval_error(self): + cdef _BaseErrorLog entries + entries = self._error_log.filter_types(_XPATH_EVAL_ERRORS) + if not entries: + entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS) + if entries: + message = entries._buildExceptionMessage(None) + if message is not None: + return XPathEvalError(message, self._error_log) + return XPathEvalError( + self._error_log._buildExceptionMessage("Error in xpath expression"), + self._error_log) + + cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): + if self._context._exc._has_raised(): + if xpathObj is not NULL: + _freeXPathObject(xpathObj) + xpathObj = NULL + self._context._release_temp_refs() + self._context._exc._raise_if_stored() + + if xpathObj is NULL: + self._context._release_temp_refs() + raise self._build_eval_error() + + try: + result = _unwrapXPathObject(xpathObj, doc, self._context) + finally: + _freeXPathObject(xpathObj) + self._context._release_temp_refs() + + return result + + +cdef class XPathElementEvaluator(_XPathEvaluatorBase): + """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True, smart_strings=True) + Create an XPath evaluator for an element. + + Absolute XPath expressions (starting with '/') will be evaluated against + the ElementTree as returned by getroottree(). + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + cdef _Element _element + def __init__(self, _Element element not None, *, namespaces=None, + extensions=None, regexp=True, smart_strings=True): + cdef xpath.xmlXPathContext* xpathCtxt + cdef int ns_register_status + cdef _Document doc + _assertValidNode(element) + _assertValidDoc(element._doc) + self._element = element + doc = element._doc + _XPathEvaluatorBase.__init__(self, namespaces, extensions, + regexp, smart_strings) + xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) + if xpathCtxt is NULL: + raise MemoryError() + self.set_context(xpathCtxt) + + def register_namespace(self, prefix, uri): + """Register a namespace with the XPath context. + """ + assert self._xpathCtxt is not NULL, "XPath context not initialised" + self._context.addNamespace(prefix, uri) + + def register_namespaces(self, namespaces): + """Register a prefix -> uri dict. + """ + assert self._xpathCtxt is not NULL, "XPath context not initialised" + for prefix, uri in namespaces.items(): + self._context.addNamespace(prefix, uri) + + def __call__(self, _path, **_variables): + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. + + Variables may be provided as keyword arguments. Note that namespaces + are currently not supported for variables. + + Absolute XPath expressions (starting with '/') will be evaluated + against the ElementTree as returned by getroottree(). + """ + cdef xpath.xmlXPathObject* xpathObj + cdef _Document doc + assert self._xpathCtxt is not NULL, "XPath context not initialised" + path = _utf8(_path) + doc = self._element._doc + + self._lock() + self._xpathCtxt.node = self._element._c_node + try: + self._context.register_context(doc) + self._context.registerVariables(_variables) + c_path = _xcstr(path) + with nogil: + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) + result = self._handle_result(xpathObj, doc) + finally: + self._context.unregister_context() + self._unlock() + + return result + + +cdef class XPathDocumentEvaluator(XPathElementEvaluator): + """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True, smart_strings=True) + Create an XPath evaluator for an ElementTree. + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + def __init__(self, _ElementTree etree not None, *, namespaces=None, + extensions=None, regexp=True, smart_strings=True): + XPathElementEvaluator.__init__( + self, etree._context_node, namespaces=namespaces, + extensions=extensions, regexp=regexp, + smart_strings=smart_strings) + + def __call__(self, _path, **_variables): + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. + + Variables may be provided as keyword arguments. Note that namespaces + are currently not supported for variables. + """ + cdef xpath.xmlXPathObject* xpathObj + cdef xmlDoc* c_doc + cdef _Document doc + assert self._xpathCtxt is not NULL, "XPath context not initialised" + path = _utf8(_path) + doc = self._element._doc + + self._lock() + try: + self._context.register_context(doc) + c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) + try: + self._context.registerVariables(_variables) + c_path = _xcstr(path) + with nogil: + self._xpathCtxt.doc = c_doc + self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) + result = self._handle_result(xpathObj, doc) + finally: + _destroyFakeDoc(doc._c_doc, c_doc) + self._context.unregister_context() + finally: + self._unlock() + + return result + + +def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None, + regexp=True, smart_strings=True): + """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True, smart_strings=True) + + Creates an XPath evaluator for an ElementTree or an Element. + + The resulting object can be called with an XPath expression as argument + and XPath variables provided as keyword arguments. + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + if isinstance(etree_or_element, _ElementTree): + return XPathDocumentEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp, smart_strings=smart_strings) + else: + return XPathElementEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp, smart_strings=smart_strings) + + +cdef class XPath(_XPathEvaluatorBase): + """XPath(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True) + A compiled XPath expression that can be called on Elements and ElementTrees. + + Besides the XPath expression, you can pass prefix-namespace + mappings and extension functions to the constructor through the + keyword arguments ``namespaces`` and ``extensions``. EXSLT + regular expression support can be disabled with the 'regexp' + boolean keyword (defaults to True). Smart strings will be + returned for string results unless you pass + ``smart_strings=False``. + """ + cdef xpath.xmlXPathCompExpr* _xpath + cdef bytes _path + def __cinit__(self): + self._xpath = NULL + + def __init__(self, path, *, namespaces=None, extensions=None, + regexp=True, smart_strings=True): + cdef xpath.xmlXPathContext* xpathCtxt + _XPathEvaluatorBase.__init__(self, namespaces, extensions, + regexp, smart_strings) + self._path = _utf8(path) + xpathCtxt = xpath.xmlXPathNewContext(NULL) + if xpathCtxt is NULL: + raise MemoryError() + self.set_context(xpathCtxt) + self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _xcstr(self._path)) + if self._xpath is NULL: + raise self._build_parse_error() + + def __call__(self, _etree_or_element, **_variables): + "__call__(self, _etree_or_element, **_variables)" + cdef xpath.xmlXPathObject* xpathObj + cdef _Document document + cdef _Element element + + assert self._xpathCtxt is not NULL, "XPath context not initialised" + document = _documentOrRaise(_etree_or_element) + element = _rootNodeOrRaise(_etree_or_element) + + self._lock() + self._xpathCtxt.doc = document._c_doc + self._xpathCtxt.node = element._c_node + + try: + self._context.register_context(document) + self._context.registerVariables(_variables) + with nogil: + xpathObj = xpath.xmlXPathCompiledEval( + self._xpath, self._xpathCtxt) + result = self._handle_result(xpathObj, document) + finally: + self._context.unregister_context() + self._unlock() + return result + + @property + def path(self): + """The literal XPath expression. + """ + return self._path.decode('UTF-8') + + def __dealloc__(self): + if self._xpath is not NULL: + xpath.xmlXPathFreeCompExpr(self._xpath) + + def __repr__(self): + return self.path + + +cdef object _replace_strings = re.compile(b'("[^"]*")|(\'[^\']*\')').sub +cdef object _find_namespaces = re.compile(b'({[^}]+})').findall + +cdef class ETXPath(XPath): + """ETXPath(self, path, extensions=None, regexp=True, smart_strings=True) + Special XPath class that supports the ElementTree {uri} notation for namespaces. + + Note that this class does not accept the ``namespace`` keyword + argument. All namespaces must be passed as part of the path + string. Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + def __init__(self, path, *, extensions=None, regexp=True, + smart_strings=True): + path, namespaces = self._nsextract_path(path) + XPath.__init__(self, path, namespaces=namespaces, + extensions=extensions, regexp=regexp, + smart_strings=smart_strings) + + cdef _nsextract_path(self, path): + # replace {namespaces} by new prefixes + cdef dict namespaces = {} + cdef list namespace_defs = [] + cdef int i + path_utf = _utf8(path) + stripped_path = _replace_strings(b'', path_utf) # remove string literals + i = 1 + for namespace_def in _find_namespaces(stripped_path): + if namespace_def not in namespace_defs: + prefix = python.PyBytes_FromFormat("__xpp%02d", i) + i += 1 + namespace_defs.append(namespace_def) + namespace = namespace_def[1:-1] # remove '{}' + namespace = (<bytes>namespace).decode('utf8') + namespaces[prefix.decode('utf8')] = namespace + prefix_str = prefix + b':' + # FIXME: this also replaces {namespaces} within strings! + path_utf = path_utf.replace(namespace_def, prefix_str) + path = path_utf.decode('utf8') + return path, namespaces |