diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml')
166 files changed, 57457 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/ElementInclude.py b/.venv/lib/python3.12/site-packages/lxml/ElementInclude.py new file mode 100644 index 00000000..21884336 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/ElementInclude.py @@ -0,0 +1,244 @@ +# +# ElementTree +# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $ +# +# limited xinclude support for element trees +# +# history: +# 2003-08-15 fl created +# 2003-11-14 fl fixed default loader +# +# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +""" +Limited XInclude support for the ElementTree package. + +While lxml.etree has full support for XInclude (see +`etree.ElementTree.xinclude()`), this module provides a simpler, pure +Python, ElementTree compatible implementation that supports a simple +form of custom URL resolvers. +""" + +from lxml import etree +try: + from urlparse import urljoin + from urllib2 import urlopen +except ImportError: + # Python 3 + from urllib.parse import urljoin + from urllib.request import urlopen + +XINCLUDE = "{http://www.w3.org/2001/XInclude}" + +XINCLUDE_INCLUDE = XINCLUDE + "include" +XINCLUDE_FALLBACK = XINCLUDE + "fallback" +XINCLUDE_ITER_TAG = XINCLUDE + "*" + +# For security reasons, the inclusion depth is limited to this read-only value by default. +DEFAULT_MAX_INCLUSION_DEPTH = 6 + + +## +# Fatal include error. + +class FatalIncludeError(etree.LxmlSyntaxError): + pass + + +class LimitedRecursiveIncludeError(FatalIncludeError): + pass + + +## +# ET compatible default loader. +# This loader reads an included resource from disk. +# +# @param href Resource reference. +# @param parse Parse mode. Either "xml" or "text". +# @param encoding Optional text encoding. +# @return The expanded resource. If the parse mode is "xml", this +# is an ElementTree instance. If the parse mode is "text", this +# is a Unicode string. If the loader fails, it can return None +# or raise an IOError exception. +# @throws IOError If the loader fails to load the resource. + +def default_loader(href, parse, encoding=None): + file = open(href, 'rb') + if parse == "xml": + data = etree.parse(file).getroot() + else: + data = file.read() + if not encoding: + encoding = 'utf-8' + data = data.decode(encoding) + file.close() + return data + + +## +# Default loader used by lxml.etree - handles custom resolvers properly +# + +def _lxml_default_loader(href, parse, encoding=None, parser=None): + if parse == "xml": + data = etree.parse(href, parser).getroot() + else: + if "://" in href: + f = urlopen(href) + else: + f = open(href, 'rb') + data = f.read() + f.close() + if not encoding: + encoding = 'utf-8' + data = data.decode(encoding) + return data + + +## +# Wrapper for ET compatibility - drops the parser + +def _wrap_et_loader(loader): + def load(href, parse, encoding=None, parser=None): + return loader(href, parse, encoding) + return load + + +## +# Expand XInclude directives. +# +# @param elem Root element. +# @param loader Optional resource loader. If omitted, it defaults +# to {@link default_loader}. If given, it should be a callable +# that implements the same interface as <b>default_loader</b>. +# @param base_url The base URL of the original file, to resolve +# relative include file references. +# @param max_depth The maximum number of recursive inclusions. +# Limited to reduce the risk of malicious content explosion. +# Pass None to disable the limitation. +# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded. +# @throws FatalIncludeError If the function fails to include a given +# resource, or if the tree contains malformed XInclude elements. +# @throws IOError If the function fails to load a given resource. +# @returns the node or its replacement if it was an XInclude node + +def include(elem, loader=None, base_url=None, + max_depth=DEFAULT_MAX_INCLUSION_DEPTH): + if max_depth is None: + max_depth = -1 + elif max_depth < 0: + raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth) + + if base_url is None: + if hasattr(elem, 'getroot'): + tree = elem + elem = elem.getroot() + else: + tree = elem.getroottree() + if hasattr(tree, 'docinfo'): + base_url = tree.docinfo.URL + elif hasattr(elem, 'getroot'): + elem = elem.getroot() + _include(elem, loader, base_url, max_depth) + + +def _include(elem, loader=None, base_url=None, + max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None): + if loader is not None: + load_include = _wrap_et_loader(loader) + else: + load_include = _lxml_default_loader + + if _parent_hrefs is None: + _parent_hrefs = set() + + parser = elem.getroottree().parser + + include_elements = list( + elem.iter(XINCLUDE_ITER_TAG)) + + for e in include_elements: + if e.tag == XINCLUDE_INCLUDE: + # process xinclude directive + href = urljoin(base_url, e.get("href")) + parse = e.get("parse", "xml") + parent = e.getparent() + if parse == "xml": + if href in _parent_hrefs: + raise FatalIncludeError( + "recursive include of %r detected" % href + ) + if max_depth == 0: + raise LimitedRecursiveIncludeError( + "maximum xinclude depth reached when including file %s" % href) + node = load_include(href, parse, parser=parser) + if node is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs) + if e.tail: + node.tail = (node.tail or "") + e.tail + if parent is None: + return node # replaced the root node! + parent.replace(e, node) + elif parse == "text": + text = load_include(href, parse, encoding=e.get("encoding")) + if text is None: + raise FatalIncludeError( + "cannot load %r as %r" % (href, parse) + ) + predecessor = e.getprevious() + if predecessor is not None: + predecessor.tail = (predecessor.tail or "") + text + elif parent is None: + return text # replaced the root node! + else: + parent.text = (parent.text or "") + text + (e.tail or "") + parent.remove(e) + else: + raise FatalIncludeError( + "unknown parse type in xi:include tag (%r)" % parse + ) + elif e.tag == XINCLUDE_FALLBACK: + parent = e.getparent() + if parent is not None and parent.tag != XINCLUDE_INCLUDE: + raise FatalIncludeError( + "xi:fallback tag must be child of xi:include (%r)" % e.tag + ) + else: + raise FatalIncludeError( + "Invalid element found in XInclude namespace (%r)" % e.tag + ) + return elem diff --git a/.venv/lib/python3.12/site-packages/lxml/__init__.py b/.venv/lib/python3.12/site-packages/lxml/__init__.py new file mode 100644 index 00000000..45cee20a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/__init__.py @@ -0,0 +1,22 @@ +# this is a package + +__version__ = "5.3.1" + + +def get_include(): + """ + Returns a list of header include paths (for lxml itself, libxml2 + and libxslt) needed to compile C code against lxml if it was built + with statically linked libraries. + """ + import os + lxml_path = __path__[0] + include_path = os.path.join(lxml_path, 'includes') + includes = [include_path, lxml_path] + + for name in os.listdir(include_path): + path = os.path.join(include_path, name) + if os.path.isdir(path): + includes.append(path) + + return includes diff --git a/.venv/lib/python3.12/site-packages/lxml/_elementpath.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/_elementpath.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..866b7c15 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/_elementpath.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/_elementpath.py b/.venv/lib/python3.12/site-packages/lxml/_elementpath.py new file mode 100644 index 00000000..6233a635 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/_elementpath.py @@ -0,0 +1,341 @@ +# cython: language_level=2 + +# +# ElementTree +# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $ +# +# limited xpath support for element trees +# +# history: +# 2003-05-23 fl created +# 2003-05-28 fl added support for // etc +# 2003-08-27 fl fixed parsing of periods in element names +# 2007-09-10 fl new selection engine +# 2007-09-12 fl fixed parent selector +# 2007-09-13 fl added iterfind; changed findall to return a list +# 2007-11-30 fl added namespaces support +# 2009-10-30 fl added child element value filter +# +# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved. +# +# fredrik@pythonware.com +# http://www.pythonware.com +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2009 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +## +# Implementation module for XPath support. There's usually no reason +# to import this module directly; the <b>ElementTree</b> does this for +# you, if needed. +## + + +import re + +xpath_tokenizer_re = re.compile( + "(" + "'[^']*'|\"[^\"]*\"|" + "::|" + "//?|" + r"\.\.|" + r"\(\)|" + r"[/.*:\[\]\(\)@=])|" + r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|" + r"\s+" + ) + +def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True): + # ElementTree uses '', lxml used None originally. + default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None + parsing_attribute = False + for token in xpath_tokenizer_re.findall(pattern): + ttype, tag = token + if tag and tag[0] != "{": + if ":" in tag and with_prefixes: + prefix, uri = tag.split(":", 1) + try: + if not namespaces: + raise KeyError + yield ttype, "{%s}%s" % (namespaces[prefix], uri) + except KeyError: + raise SyntaxError("prefix %r not found in prefix map" % prefix) + elif default_namespace and not parsing_attribute: + yield ttype, "{%s}%s" % (default_namespace, tag) + else: + yield token + parsing_attribute = False + else: + yield token + parsing_attribute = ttype == '@' + + +def prepare_child(next, token): + tag = token[1] + def select(result): + for elem in result: + yield from elem.iterchildren(tag) + return select + +def prepare_star(next, token): + def select(result): + for elem in result: + yield from elem.iterchildren('*') + return select + +def prepare_self(next, token): + def select(result): + return result + return select + +def prepare_descendant(next, token): + token = next() + if token[0] == "*": + tag = "*" + elif not token[0]: + tag = token[1] + else: + raise SyntaxError("invalid descendant") + def select(result): + for elem in result: + yield from elem.iterdescendants(tag) + return select + +def prepare_parent(next, token): + def select(result): + for elem in result: + parent = elem.getparent() + if parent is not None: + yield parent + return select + +def prepare_predicate(next, token): + # FIXME: replace with real parser!!! refs: + # http://effbot.org/zone/simple-iterator-parser.htm + # http://javascript.crockford.com/tdop/tdop.html + signature = '' + predicate = [] + while 1: + token = next() + if token[0] == "]": + break + if token == ('', ''): + # ignore whitespace + continue + if token[0] and token[0][:1] in "'\"": + token = "'", token[0][1:-1] + signature += token[0] or "-" + predicate.append(token[1]) + + # use signature to determine predicate type + if signature == "@-": + # [@attribute] predicate + key = predicate[1] + def select(result): + for elem in result: + if elem.get(key) is not None: + yield elem + return select + if signature == "@-='": + # [@attribute='value'] + key = predicate[1] + value = predicate[-1] + def select(result): + for elem in result: + if elem.get(key) == value: + yield elem + return select + if signature == "-" and not re.match(r"-?\d+$", predicate[0]): + # [tag] + tag = predicate[0] + def select(result): + for elem in result: + for _ in elem.iterchildren(tag): + yield elem + break + return select + if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])): + # [.='value'] or [tag='value'] + tag = predicate[0] + value = predicate[-1] + if tag: + def select(result): + for elem in result: + for e in elem.iterchildren(tag): + if "".join(e.itertext()) == value: + yield elem + break + else: + def select(result): + for elem in result: + if "".join(elem.itertext()) == value: + yield elem + return select + if signature == "-" or signature == "-()" or signature == "-()-": + # [index] or [last()] or [last()-index] + if signature == "-": + # [index] + index = int(predicate[0]) - 1 + if index < 0: + if index == -1: + raise SyntaxError( + "indices in path predicates are 1-based, not 0-based") + else: + raise SyntaxError("path index >= 1 expected") + else: + if predicate[0] != "last": + raise SyntaxError("unsupported function") + if signature == "-()-": + try: + index = int(predicate[2]) - 1 + except ValueError: + raise SyntaxError("unsupported expression") + else: + index = -1 + def select(result): + for elem in result: + parent = elem.getparent() + if parent is None: + continue + try: + # FIXME: what if the selector is "*" ? + elems = list(parent.iterchildren(elem.tag)) + if elems[index] is elem: + yield elem + except IndexError: + pass + return select + raise SyntaxError("invalid predicate") + +ops = { + "": prepare_child, + "*": prepare_star, + ".": prepare_self, + "..": prepare_parent, + "//": prepare_descendant, + "[": prepare_predicate, +} + + +# -------------------------------------------------------------------- + +_cache = {} + + +def _build_path_iterator(path, namespaces, with_prefixes=True): + """compile selector pattern""" + if path[-1:] == "/": + path += "*" # implicit all (FIXME: keep this?) + + cache_key = (path,) + if namespaces: + # lxml originally used None for the default namespace but ElementTree uses the + # more convenient (all-strings-dict) empty string, so we support both here, + # preferring the more convenient '', as long as they aren't ambiguous. + if None in namespaces: + if '' in namespaces and namespaces[None] != namespaces['']: + raise ValueError("Ambiguous default namespace provided: %r versus %r" % ( + namespaces[None], namespaces[''])) + cache_key += (namespaces[None],) + tuple(sorted( + item for item in namespaces.items() if item[0] is not None)) + else: + cache_key += tuple(sorted(namespaces.items())) + + try: + return _cache[cache_key] + except KeyError: + pass + if len(_cache) > 100: + _cache.clear() + + if path[:1] == "/": + raise SyntaxError("cannot use absolute path on element") + stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes)) + try: + _next = stream.next + except AttributeError: + # Python 3 + _next = stream.__next__ + try: + token = _next() + except StopIteration: + raise SyntaxError("empty path expression") + selector = [] + while 1: + try: + selector.append(ops[token[0]](_next, token)) + except StopIteration: + raise SyntaxError("invalid path") + try: + token = _next() + if token[0] == "/": + token = _next() + except StopIteration: + break + _cache[cache_key] = selector + return selector + + +## +# Iterate over the matching nodes + +def iterfind(elem, path, namespaces=None, with_prefixes=True): + selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes) + result = iter((elem,)) + for select in selector: + result = select(result) + return result + + +## +# Find first matching object. + +def find(elem, path, namespaces=None, with_prefixes=True): + it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes) + try: + return next(it) + except StopIteration: + return None + + +## +# Find all matching objects. + +def findall(elem, path, namespaces=None, with_prefixes=True): + return list(iterfind(elem, path, namespaces)) + + +## +# Find text for first matching object. + +def findtext(elem, path, default=None, namespaces=None, with_prefixes=True): + el = find(elem, path, namespaces, with_prefixes=with_prefixes) + if el is None: + return default + else: + return el.text or '' diff --git a/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi b/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi new file mode 100644 index 00000000..fb60af7d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/apihelpers.pxi @@ -0,0 +1,1793 @@ +# Private/public helper functions for API functions + +from lxml.includes cimport uri + + +cdef void displayNode(xmlNode* c_node, indent) noexcept: + # to help with debugging + cdef xmlNode* c_child + try: + print(indent * ' ', <long>c_node) + c_child = c_node.children + while c_child is not NULL: + displayNode(c_child, indent + 1) + c_child = c_child.next + finally: + return # swallow any exceptions + +cdef inline bint _isHtmlDocument(_Element element) except -1: + cdef xmlNode* c_node = element._c_node + return ( + c_node is not NULL and c_node.doc is not NULL and + c_node.doc.properties & tree.XML_DOC_HTML != 0 + ) + +cdef inline int _assertValidNode(_Element element) except -1: + assert element._c_node is not NULL, "invalid Element proxy at %s" % id(element) + +cdef inline int _assertValidDoc(_Document doc) except -1: + assert doc._c_doc is not NULL, "invalid Document proxy at %s" % id(doc) + +cdef _Document _documentOrRaise(object input): + """Call this to get the document of a _Document, _ElementTree or _Element + object, or to raise an exception if it can't be determined. + + Should be used in all API functions for consistency. + """ + cdef _Document doc + if isinstance(input, _ElementTree): + if (<_ElementTree>input)._context_node is not None: + doc = (<_ElementTree>input)._context_node._doc + else: + doc = None + elif isinstance(input, _Element): + doc = (<_Element>input)._doc + elif isinstance(input, _Document): + doc = <_Document>input + else: + raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}" + if doc is None: + raise ValueError, f"Input object has no document: {python._fqtypename(input).decode('utf8')}" + _assertValidDoc(doc) + return doc + +cdef _Element _rootNodeOrRaise(object input): + """Call this to get the root node of a _Document, _ElementTree or + _Element object, or to raise an exception if it can't be determined. + + Should be used in all API functions for consistency. + """ + cdef _Element node + if isinstance(input, _ElementTree): + node = (<_ElementTree>input)._context_node + elif isinstance(input, _Element): + node = <_Element>input + elif isinstance(input, _Document): + node = (<_Document>input).getroot() + else: + raise TypeError, f"Invalid input object: {python._fqtypename(input).decode('utf8')}" + if (node is None or not node._c_node or + node._c_node.type != tree.XML_ELEMENT_NODE): + raise ValueError, f"Input object is not an XML element: {python._fqtypename(input).decode('utf8')}" + _assertValidNode(node) + return node + +cdef bint _isAncestorOrSame(xmlNode* c_ancestor, xmlNode* c_node) noexcept: + while c_node: + if c_node is c_ancestor: + return True + c_node = c_node.parent + return False + +cdef _Element _makeElement(tag, xmlDoc* c_doc, _Document doc, + _BaseParser parser, text, tail, attrib, nsmap, + dict extra_attrs): + """Create a new element and initialize text content, namespaces and + attributes. + + This helper function will reuse as much of the existing document as + possible: + + If 'parser' is None, the parser will be inherited from 'doc' or the + default parser will be used. + + If 'doc' is None, 'c_doc' is used to create a new _Document and the new + element is made its root node. + + If 'c_doc' is also NULL, a new xmlDoc will be created. + """ + cdef xmlNode* c_node + if doc is not None: + c_doc = doc._c_doc + ns_utf, name_utf = _getNsTag(tag) + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name_utf) + if c_doc is NULL: + c_doc = _newHTMLDoc() + else: + _tagValidOrRaise(name_utf) + if c_doc is NULL: + c_doc = _newXMLDoc() + c_node = _createElement(c_doc, name_utf) + if c_node is NULL: + if doc is None and c_doc is not NULL: + tree.xmlFreeDoc(c_doc) + raise MemoryError() + try: + if doc is None: + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + # add namespaces to node if necessary + _setNodeNamespaces(c_node, doc, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + return _elementFactory(doc, c_node) + except: + # free allocated c_node/c_doc unless Python does it for us + if c_node.doc is not c_doc: + # node not yet in document => will not be freed by document + if tail is not None: + _removeText(c_node.next) # tail + tree.xmlFreeNode(c_node) + if doc is None: + # c_doc will not be freed by doc + tree.xmlFreeDoc(c_doc) + raise + +cdef int _initNewElement(_Element element, bint is_html, name_utf, ns_utf, + _BaseParser parser, attrib, nsmap, dict extra_attrs) except -1: + """Initialise a new Element object. + + This is used when users instantiate a Python Element subclass + directly, without it being mapped to an existing XML node. + """ + cdef xmlDoc* c_doc + cdef xmlNode* c_node + cdef _Document doc + if is_html: + _htmlTagValidOrRaise(name_utf) + c_doc = _newHTMLDoc() + else: + _tagValidOrRaise(name_utf) + c_doc = _newXMLDoc() + c_node = _createElement(c_doc, name_utf) + if c_node is NULL: + if c_doc is not NULL: + tree.xmlFreeDoc(c_doc) + raise MemoryError() + tree.xmlDocSetRootElement(c_doc, c_node) + doc = _documentFactory(c_doc, parser) + # add namespaces to node if necessary + _setNodeNamespaces(c_node, doc, ns_utf, nsmap) + _initNodeAttributes(c_node, doc, attrib, extra_attrs) + _registerProxy(element, doc, c_node) + element._init() + return 0 + +cdef _Element _makeSubElement(_Element parent, tag, text, tail, + attrib, nsmap, dict extra_attrs): + """Create a new child element and initialize text content, namespaces and + attributes. + """ + cdef xmlNode* c_node + cdef xmlDoc* c_doc + if parent is None or parent._doc is None: + return None + _assertValidNode(parent) + ns_utf, name_utf = _getNsTag(tag) + c_doc = parent._doc._c_doc + + if parent._doc._parser is not None and parent._doc._parser._for_html: + _htmlTagValidOrRaise(name_utf) + else: + _tagValidOrRaise(name_utf) + + c_node = _createElement(c_doc, name_utf) + if c_node is NULL: + raise MemoryError() + tree.xmlAddChild(parent._c_node, c_node) + + try: + if text is not None: + _setNodeText(c_node, text) + if tail is not None: + _setTailText(c_node, tail) + + # add namespaces to node if necessary + _setNodeNamespaces(c_node, parent._doc, ns_utf, nsmap) + _initNodeAttributes(c_node, parent._doc, attrib, extra_attrs) + return _elementFactory(parent._doc, c_node) + except: + # make sure we clean up in case of an error + _removeNode(parent._doc, c_node) + raise + + +cdef int _setNodeNamespaces(xmlNode* c_node, _Document doc, + object node_ns_utf, object nsmap) except -1: + """Lookup current namespace prefixes, then set namespace structure for + node (if 'node_ns_utf' was provided) and register new ns-prefix mappings. + + 'node_ns_utf' should only be passed for a newly created node. + """ + cdef xmlNs* c_ns + cdef list nsdefs + + if nsmap: + for prefix, href in _iter_nsmap(nsmap): + href_utf = _utf8(href) + _uriValidOrRaise(href_utf) + c_href = _xcstr(href_utf) + if prefix is not None: + prefix_utf = _utf8(prefix) + _prefixValidOrRaise(prefix_utf) + c_prefix = _xcstr(prefix_utf) + else: + c_prefix = <const_xmlChar*>NULL + # add namespace with prefix if it is not already known + c_ns = tree.xmlSearchNs(doc._c_doc, c_node, c_prefix) + if c_ns is NULL or \ + c_ns.href is NULL or \ + tree.xmlStrcmp(c_ns.href, c_href) != 0: + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if href_utf == node_ns_utf: + tree.xmlSetNs(c_node, c_ns) + node_ns_utf = None + + if node_ns_utf is not None: + _uriValidOrRaise(node_ns_utf) + doc._setNodeNs(c_node, _xcstr(node_ns_utf)) + return 0 + + +cdef dict _build_nsmap(xmlNode* c_node): + """ + Namespace prefix->URI mapping known in the context of this Element. + This includes all namespace declarations of the parents. + """ + cdef xmlNs* c_ns + nsmap = {} + while c_node is not NULL and c_node.type == tree.XML_ELEMENT_NODE: + c_ns = c_node.nsDef + while c_ns is not NULL: + if c_ns.prefix or c_ns.href: + prefix = funicodeOrNone(c_ns.prefix) + if prefix not in nsmap: + nsmap[prefix] = funicodeOrNone(c_ns.href) + c_ns = c_ns.next + c_node = c_node.parent + return nsmap + + +cdef _iter_nsmap(nsmap): + """ + Create a reproducibly ordered iterable from an nsmap mapping. + Tries to preserve an existing order and sorts if it assumes no order. + + The difference to _iter_attrib() is that None doesn't sort with strings + in Py3.x. + """ + if isinstance(nsmap, dict): + # dicts are insertion-ordered in Py3.6+ => keep the user provided order. + return nsmap.items() + if len(nsmap) <= 1: + return nsmap.items() + # nsmap will usually be a plain unordered dict => avoid type checking overhead + if type(nsmap) is not dict and isinstance(nsmap, OrderedDict): + return nsmap.items() # keep existing order + if None not in nsmap: + return sorted(nsmap.items()) + + # Move the default namespace to the end. This makes sure libxml2 + # prefers a prefix if the ns is defined redundantly on the same + # element. That way, users can work around a problem themselves + # where default namespace attributes on non-default namespaced + # elements serialise without prefix (i.e. into the non-default + # namespace). + default_ns = nsmap[None] + nsdefs = [(k, v) for k, v in nsmap.items() if k is not None] + nsdefs.sort() + nsdefs.append((None, default_ns)) + return nsdefs + + +cdef _iter_attrib(attrib): + """ + Create a reproducibly ordered iterable from an attrib mapping. + Tries to preserve an existing order and sorts if it assumes no order. + """ + # dicts are insertion-ordered in Py3.6+ => keep the user provided order. + if isinstance(attrib, (dict, _Attrib, OrderedDict)): + return attrib.items() + # assume it's an unordered mapping of some kind + return sorted(attrib.items()) + + +cdef _initNodeAttributes(xmlNode* c_node, _Document doc, attrib, dict extra): + """Initialise the attributes of an element node. + """ + cdef bint is_html + cdef xmlNs* c_ns + if attrib is not None and not hasattr(attrib, 'items'): + raise TypeError, f"Invalid attribute dictionary: {python._fqtypename(attrib).decode('utf8')}" + if not attrib and not extra: + return # nothing to do + is_html = doc._parser._for_html + seen = set() + if extra: + for name, value in extra.items(): + _addAttributeToNode(c_node, doc, is_html, name, value, seen) + if attrib: + for name, value in _iter_attrib(attrib): + _addAttributeToNode(c_node, doc, is_html, name, value, seen) + + +cdef int _addAttributeToNode(xmlNode* c_node, _Document doc, bint is_html, + name, value, set seen_tags) except -1: + ns_utf, name_utf = tag = _getNsTag(name) + if tag in seen_tags: + return 0 + seen_tags.add(tag) + if not is_html: + _attributeValidOrRaise(name_utf) + value_utf = _utf8(value) + if ns_utf is None: + tree.xmlNewProp(c_node, _xcstr(name_utf), _xcstr(value_utf)) + else: + _uriValidOrRaise(ns_utf) + c_ns = doc._findOrBuildNodeNs(c_node, _xcstr(ns_utf), NULL, 1) + tree.xmlNewNsProp(c_node, c_ns, + _xcstr(name_utf), _xcstr(value_utf)) + return 0 + + +ctypedef struct _ns_node_ref: + xmlNs* ns + xmlNode* node + + +cdef int _collectNsDefs(xmlNode* c_element, _ns_node_ref **_c_ns_list, + size_t *_c_ns_list_len, size_t *_c_ns_list_size) except -1: + c_ns_list = _c_ns_list[0] + cdef size_t c_ns_list_len = _c_ns_list_len[0] + cdef size_t c_ns_list_size = _c_ns_list_size[0] + + c_nsdef = c_element.nsDef + while c_nsdef is not NULL: + if c_ns_list_len >= c_ns_list_size: + if c_ns_list is NULL: + c_ns_list_size = 20 + else: + c_ns_list_size *= 2 + c_nsref_ptr = <_ns_node_ref*> python.lxml_realloc( + c_ns_list, c_ns_list_size, sizeof(_ns_node_ref)) + if c_nsref_ptr is NULL: + if c_ns_list is not NULL: + python.lxml_free(c_ns_list) + _c_ns_list[0] = NULL + raise MemoryError() + c_ns_list = c_nsref_ptr + + c_ns_list[c_ns_list_len] = _ns_node_ref(c_nsdef, c_element) + c_ns_list_len += 1 + c_nsdef = c_nsdef.next + + _c_ns_list_size[0] = c_ns_list_size + _c_ns_list_len[0] = c_ns_list_len + _c_ns_list[0] = c_ns_list + + +cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element, set prefixes_to_keep) except -1: + """Remove any namespace declarations from a subtree that are not used by + any of its elements (or attributes). + + If a 'prefixes_to_keep' is provided, it must be a set of prefixes. + Any corresponding namespace mappings will not be removed as part of the cleanup. + """ + cdef xmlNode* c_node + cdef _ns_node_ref* c_ns_list = NULL + cdef size_t c_ns_list_size = 0 + cdef size_t c_ns_list_len = 0 + cdef size_t i + + if c_element.parent and c_element.parent.type == tree.XML_DOCUMENT_NODE: + # include declarations on the document node + _collectNsDefs(c_element.parent, &c_ns_list, &c_ns_list_len, &c_ns_list_size) + + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_element, c_element, 1) + # collect all new namespace declarations into the ns list + if c_element.nsDef: + _collectNsDefs(c_element, &c_ns_list, &c_ns_list_len, &c_ns_list_size) + + # remove all namespace declarations from the list that are referenced + if c_ns_list_len and c_element.type == tree.XML_ELEMENT_NODE: + c_node = c_element + while c_node and c_ns_list_len: + if c_node.ns: + for i in range(c_ns_list_len): + if c_node.ns is c_ns_list[i].ns: + c_ns_list_len -= 1 + c_ns_list[i] = c_ns_list[c_ns_list_len] + #c_ns_list[c_ns_list_len] = _ns_node_ref(NULL, NULL) + break + if c_node is c_element: + # continue with attributes + c_node = <xmlNode*>c_element.properties + else: + c_node = c_node.next + tree.END_FOR_EACH_ELEMENT_FROM(c_element) + + if c_ns_list is NULL: + return 0 + + # free all namespace declarations that remained in the list, + # except for those we should keep explicitly + cdef xmlNs* c_nsdef + for i in range(c_ns_list_len): + if prefixes_to_keep is not None: + if c_ns_list[i].ns.prefix and c_ns_list[i].ns.prefix in prefixes_to_keep: + continue + c_node = c_ns_list[i].node + c_nsdef = c_node.nsDef + if c_nsdef is c_ns_list[i].ns: + c_node.nsDef = c_node.nsDef.next + else: + while c_nsdef.next is not c_ns_list[i].ns: + c_nsdef = c_nsdef.next + c_nsdef.next = c_nsdef.next.next + tree.xmlFreeNs(c_ns_list[i].ns) + + if c_ns_list is not NULL: + python.lxml_free(c_ns_list) + return 0 + +cdef xmlNs* _searchNsByHref(xmlNode* c_node, const_xmlChar* c_href, bint is_attribute) noexcept: + """Search a namespace declaration that covers a node (element or + attribute). + + For attributes, try to find a prefixed namespace declaration + instead of the default namespaces. This helps in supporting + round-trips for attributes on elements with a different namespace. + """ + cdef xmlNs* c_ns + cdef xmlNs* c_default_ns = NULL + cdef xmlNode* c_element + if c_href is NULL or c_node is NULL or c_node.type == tree.XML_ENTITY_REF_NODE: + return NULL + if tree.xmlStrcmp(c_href, tree.XML_XML_NAMESPACE) == 0: + # no special cases here, let libxml2 handle this + return tree.xmlSearchNsByHref(c_node.doc, c_node, c_href) + if c_node.type == tree.XML_ATTRIBUTE_NODE: + is_attribute = 1 + while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE: + c_node = c_node.parent + c_element = c_node + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE: + c_ns = c_node.nsDef + while c_ns is not NULL: + if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0: + if c_ns.prefix is NULL and is_attribute: + # for attributes, continue searching a named + # prefix, but keep the first default namespace + # declaration that we found + if c_default_ns is NULL: + c_default_ns = c_ns + elif tree.xmlSearchNs( + c_element.doc, c_element, c_ns.prefix) is c_ns: + # start node is in namespace scope => found! + return c_ns + c_ns = c_ns.next + if c_node is not c_element and c_node.ns is not NULL: + # optimise: the node may have the namespace itself + c_ns = c_node.ns + if c_ns.href is not NULL and tree.xmlStrcmp(c_href, c_ns.href) == 0: + if c_ns.prefix is NULL and is_attribute: + # for attributes, continue searching a named + # prefix, but keep the first default namespace + # declaration that we found + if c_default_ns is NULL: + c_default_ns = c_ns + elif tree.xmlSearchNs( + c_element.doc, c_element, c_ns.prefix) is c_ns: + # start node is in namespace scope => found! + return c_ns + c_node = c_node.parent + # nothing found => use a matching default namespace or fail + if c_default_ns is not NULL: + if tree.xmlSearchNs(c_element.doc, c_element, NULL) is c_default_ns: + return c_default_ns + return NULL + +cdef int _replaceNodeByChildren(_Document doc, xmlNode* c_node) except -1: + # NOTE: this does not deallocate the node, just unlink it! + cdef xmlNode* c_parent + cdef xmlNode* c_child + if c_node.children is NULL: + tree.xmlUnlinkNode(c_node) + return 0 + + c_parent = c_node.parent + # fix parent links of children + c_child = c_node.children + while c_child is not NULL: + c_child.parent = c_parent + c_child = c_child.next + + # fix namespace references of children if their parent's namespace + # declarations get lost + if c_node.nsDef is not NULL: + c_child = c_node.children + while c_child is not NULL: + moveNodeToDocument(doc, doc._c_doc, c_child) + c_child = c_child.next + + # fix sibling links to/from child slice + if c_node.prev is NULL: + c_parent.children = c_node.children + else: + c_node.prev.next = c_node.children + c_node.children.prev = c_node.prev + if c_node.next is NULL: + c_parent.last = c_node.last + else: + c_node.next.prev = c_node.last + c_node.last.next = c_node.next + + # unlink c_node + c_node.children = c_node.last = NULL + c_node.parent = c_node.next = c_node.prev = NULL + return 0 + +cdef unicode _attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): + c_href = _getNs(<xmlNode*>c_attrib_node) + value = tree.xmlGetNsProp(c_element, c_attrib_node.name, c_href) + try: + result = funicode(value) + finally: + tree.xmlFree(value) + return result + +cdef unicode _attributeValueFromNsName(xmlNode* c_element, + const_xmlChar* c_href, const_xmlChar* c_name): + c_result = tree.xmlGetNsProp(c_element, c_name, c_href) + if c_result is NULL: + return None + try: + result = funicode(c_result) + finally: + tree.xmlFree(c_result) + return result + +cdef object _getNodeAttributeValue(xmlNode* c_node, key, default): + ns, tag = _getNsTag(key) + c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns) + c_result = tree.xmlGetNsProp(c_node, _xcstr(tag), c_href) + if c_result is NULL: + # XXX free namespace that is not in use..? + return default + try: + result = funicode(c_result) + finally: + tree.xmlFree(c_result) + return result + +cdef inline object _getAttributeValue(_Element element, key, default): + return _getNodeAttributeValue(element._c_node, key, default) + +cdef int _setAttributeValue(_Element element, key, value) except -1: + cdef const_xmlChar* c_value + cdef xmlNs* c_ns + ns, tag = _getNsTag(key) + is_html = element._doc._parser._for_html + if not is_html: + _attributeValidOrRaise(tag) + c_tag = _xcstr(tag) + if value is None and is_html: + c_value = NULL + else: + if isinstance(value, QName): + value = _resolveQNameText(element, value) + else: + value = _utf8(value) + c_value = _xcstr(value) + if ns is None: + c_ns = NULL + else: + c_ns = element._doc._findOrBuildNodeNs(element._c_node, _xcstr(ns), NULL, 1) + tree.xmlSetNsProp(element._c_node, c_ns, c_tag, c_value) + return 0 + +cdef int _delAttribute(_Element element, key) except -1: + ns, tag = _getNsTag(key) + c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns) + if _delAttributeFromNsName(element._c_node, c_href, _xcstr(tag)): + raise KeyError, key + return 0 + +cdef int _delAttributeFromNsName(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name) noexcept: + c_attr = tree.xmlHasNsProp(c_node, c_name, c_href) + if c_attr is NULL: + # XXX free namespace that is not in use..? + return -1 + tree.xmlRemoveProp(c_attr) + return 0 + +cdef list _collectAttributes(xmlNode* c_node, int collecttype): + """Collect all attributes of a node in a list. Depending on collecttype, + it collects either the name (1), the value (2) or the name-value tuples. + """ + cdef Py_ssize_t count + c_attr = c_node.properties + count = 0 + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + count += 1 + c_attr = c_attr.next + + if not count: + return [] + + attributes = [None] * count + c_attr = c_node.properties + count = 0 + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + if collecttype == 1: + item = _namespacedName(<xmlNode*>c_attr) + elif collecttype == 2: + item = _attributeValue(c_node, c_attr) + else: + item = (_namespacedName(<xmlNode*>c_attr), + _attributeValue(c_node, c_attr)) + attributes[count] = item + count += 1 + c_attr = c_attr.next + return attributes + +cdef object __RE_XML_ENCODING = re.compile( + r'^(<\?xml[^>]+)\s+encoding\s*=\s*["\'][^"\']*["\'](\s*\?>|)', re.U) + +cdef object __REPLACE_XML_ENCODING = __RE_XML_ENCODING.sub +cdef object __HAS_XML_ENCODING = __RE_XML_ENCODING.match + +cdef object _stripEncodingDeclaration(object xml_string): + # this is a hack to remove the XML encoding declaration from unicode + return __REPLACE_XML_ENCODING(r'\g<1>\g<2>', xml_string) + +cdef bint _hasEncodingDeclaration(object xml_string) except -1: + # check if a (unicode) string has an XML encoding declaration + return __HAS_XML_ENCODING(xml_string) is not None + +cdef inline bint _hasText(xmlNode* c_node) noexcept: + return c_node is not NULL and _textNodeOrSkip(c_node.children) is not NULL + +cdef inline bint _hasTail(xmlNode* c_node) noexcept: + return c_node is not NULL and _textNodeOrSkip(c_node.next) is not NULL + +cdef inline bint _hasNonWhitespaceTail(xmlNode* c_node) except -1: + return _hasNonWhitespaceText(c_node, tail=True) + +cdef bint _hasNonWhitespaceText(xmlNode* c_node, bint tail=False) except -1: + c_text_node = c_node and _textNodeOrSkip(c_node.next if tail else c_node.children) + if c_text_node is NULL: + return False + while c_text_node is not NULL: + if c_text_node.content[0] != c'\0' and not _collectText(c_text_node).isspace(): + return True + c_text_node = _textNodeOrSkip(c_text_node.next) + return False + +cdef unicode _collectText(xmlNode* c_node): + """Collect all text nodes and return them as a unicode string. + + Start collecting at c_node. + + If there was no text to collect, return None + """ + cdef Py_ssize_t scount + cdef xmlChar* c_text + cdef xmlNode* c_node_cur + # check for multiple text nodes + scount = 0 + c_text = NULL + c_node_cur = c_node = _textNodeOrSkip(c_node) + while c_node_cur is not NULL: + if c_node_cur.content[0] != c'\0': + c_text = c_node_cur.content + scount += 1 + c_node_cur = _textNodeOrSkip(c_node_cur.next) + + # handle two most common cases first + if c_text is NULL: + return '' if scount > 0 else None + if scount == 1: + return funicode(c_text) + + # the rest is not performance critical anymore + result = b'' + while c_node is not NULL: + result += <unsigned char*>c_node.content + c_node = _textNodeOrSkip(c_node.next) + return funicode(<const_xmlChar*><unsigned char*>result) + +cdef void _removeText(xmlNode* c_node) noexcept: + """Remove all text nodes. + + Start removing at c_node. + """ + cdef xmlNode* c_next + c_node = _textNodeOrSkip(c_node) + while c_node is not NULL: + c_next = _textNodeOrSkip(c_node.next) + tree.xmlUnlinkNode(c_node) + tree.xmlFreeNode(c_node) + c_node = c_next + +cdef xmlNode* _createTextNode(xmlDoc* doc, value) except NULL: + cdef xmlNode* c_text_node + if isinstance(value, CDATA): + c_text_node = tree.xmlNewCDataBlock( + doc, _xcstr((<CDATA>value)._utf8_data), + python.PyBytes_GET_SIZE((<CDATA>value)._utf8_data)) + else: + text = _utf8(value) + c_text_node = tree.xmlNewDocText(doc, _xcstr(text)) + if not c_text_node: + raise MemoryError() + return c_text_node + +cdef int _setNodeText(xmlNode* c_node, value) except -1: + # remove all text nodes at the start first + _removeText(c_node.children) + if value is None: + return 0 + # now add new text node with value at start + c_text_node = _createTextNode(c_node.doc, value) + if c_node.children is NULL: + tree.xmlAddChild(c_node, c_text_node) + else: + tree.xmlAddPrevSibling(c_node.children, c_text_node) + return 0 + +cdef int _setTailText(xmlNode* c_node, value) except -1: + # remove all text nodes at the start first + _removeText(c_node.next) + if value is None: + return 0 + # now append new text node with value + c_text_node = _createTextNode(c_node.doc, value) + tree.xmlAddNextSibling(c_node, c_text_node) + return 0 + +cdef bytes _resolveQNameText(_Element element, value): + cdef xmlNs* c_ns + ns, tag = _getNsTag(value) + if ns is None: + return tag + else: + c_ns = element._doc._findOrBuildNodeNs( + element._c_node, _xcstr(ns), NULL, 0) + return python.PyBytes_FromFormat('%s:%s', c_ns.prefix, _cstr(tag)) + +cdef inline bint _hasChild(xmlNode* c_node) noexcept: + return c_node is not NULL and _findChildForwards(c_node, 0) is not NULL + +cdef inline Py_ssize_t _countElements(xmlNode* c_node) noexcept: + "Counts the elements within the following siblings and the node itself." + cdef Py_ssize_t count + count = 0 + while c_node is not NULL: + if _isElement(c_node): + count += 1 + c_node = c_node.next + return count + +cdef int _findChildSlice( + slice sliceobject, xmlNode* c_parent, + xmlNode** c_start_node, Py_ssize_t* c_step, Py_ssize_t* c_length) except -1: + """Resolve a children slice. + + Returns the start node, step size and the slice length in the + pointer arguments. + """ + cdef Py_ssize_t start = 0, stop = 0, childcount + childcount = _countElements(c_parent.children) + if childcount == 0: + c_start_node[0] = NULL + c_length[0] = 0 + if sliceobject.step is None: + c_step[0] = 1 + else: + python._PyEval_SliceIndex(sliceobject.step, c_step) + return 0 + python.PySlice_GetIndicesEx( + sliceobject, childcount, &start, &stop, c_step, c_length) + if start > childcount // 2: + c_start_node[0] = _findChildBackwards(c_parent, childcount - start - 1) + else: + c_start_node[0] = _findChild(c_parent, start) + return 0 + +cdef bint _isFullSlice(slice sliceobject) except -1: + """Conservative guess if this slice is a full slice as in ``s[:]``. + """ + cdef Py_ssize_t step = 0 + if sliceobject is None: + return 0 + if sliceobject.start is None and \ + sliceobject.stop is None: + if sliceobject.step is None: + return 1 + python._PyEval_SliceIndex(sliceobject.step, &step) + if step == 1: + return 1 + return 0 + return 0 + +cdef _collectChildren(_Element element): + cdef xmlNode* c_node + cdef list result = [] + c_node = element._c_node.children + if c_node is not NULL: + if not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + result.append(_elementFactory(element._doc, c_node)) + c_node = _nextElement(c_node) + return result + +cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index) noexcept: + if index < 0: + return _findChildBackwards(c_node, -index - 1) + else: + return _findChildForwards(c_node, index) + +cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index) noexcept: + """Return child element of c_node with index, or return NULL if not found. + """ + cdef xmlNode* c_child + cdef Py_ssize_t c + c_child = c_node.children + c = 0 + while c_child is not NULL: + if _isElement(c_child): + if c == index: + return c_child + c += 1 + c_child = c_child.next + return NULL + +cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index) noexcept: + """Return child element of c_node with index, or return NULL if not found. + Search from the end. + """ + cdef xmlNode* c_child + cdef Py_ssize_t c + c_child = c_node.last + c = 0 + while c_child is not NULL: + if _isElement(c_child): + if c == index: + return c_child + c += 1 + c_child = c_child.prev + return NULL + +cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) noexcept nogil: + """Return the node if it's a text node. Skip over ignorable nodes in a + series of text nodes. Return NULL if a non-ignorable node is found. + + This is used to skip over XInclude nodes when collecting adjacent text + nodes. + """ + while c_node is not NULL: + if c_node.type == tree.XML_TEXT_NODE or \ + c_node.type == tree.XML_CDATA_SECTION_NODE: + return c_node + elif c_node.type == tree.XML_XINCLUDE_START or \ + c_node.type == tree.XML_XINCLUDE_END: + c_node = c_node.next + else: + return NULL + return NULL + +cdef inline xmlNode* _nextElement(xmlNode* c_node) noexcept: + """Given a node, find the next sibling that is an element. + """ + if c_node is NULL: + return NULL + c_node = c_node.next + while c_node is not NULL: + if _isElement(c_node): + return c_node + c_node = c_node.next + return NULL + +cdef inline xmlNode* _previousElement(xmlNode* c_node) noexcept: + """Given a node, find the next sibling that is an element. + """ + if c_node is NULL: + return NULL + c_node = c_node.prev + while c_node is not NULL: + if _isElement(c_node): + return c_node + c_node = c_node.prev + return NULL + +cdef inline xmlNode* _parentElement(xmlNode* c_node) noexcept: + "Given a node, find the parent element." + if c_node is NULL or not _isElement(c_node): + return NULL + c_node = c_node.parent + if c_node is NULL or not _isElement(c_node): + return NULL + return c_node + +cdef inline bint _tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name) noexcept: + """Tests if the node matches namespace URI and tag name. + + A node matches if it matches both c_href and c_name. + + A node matches c_href if any of the following is true: + * c_href is NULL + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string equals the c_name string + """ + if c_node is NULL: + return 0 + if c_node.type != tree.XML_ELEMENT_NODE: + # not an element, only succeed if we match everything + return c_name is NULL and c_href is NULL + if c_name is NULL: + if c_href is NULL: + # always match + return 1 + else: + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return c_href[0] == c'\0' + else: + return tree.xmlStrcmp(c_node_href, c_href) == 0 + elif c_href is NULL: + if _getNs(c_node) is not NULL: + return 0 + return c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0 + elif c_node.name == c_name or tree.xmlStrcmp(c_node.name, c_name) == 0: + c_node_href = _getNs(c_node) + if c_node_href is NULL: + return c_href[0] == c'\0' + else: + return tree.xmlStrcmp(c_node_href, c_href) == 0 + else: + return 0 + +cdef inline bint _tagMatchesExactly(xmlNode* c_node, qname* c_qname) noexcept: + """Tests if the node matches namespace URI and tag name. + + This differs from _tagMatches() in that it does not consider a + NULL value in qname.href a wildcard, and that it expects the c_name + to be taken from the doc dict, i.e. it only compares the names by + address. + + A node matches if it matches both href and c_name of the qname. + + A node matches c_href if any of the following is true: + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string points to the same address (!) as c_name + """ + return _nsTagMatchesExactly(_getNs(c_node), c_node.name, c_qname) + +cdef inline bint _nsTagMatchesExactly(const_xmlChar* c_node_href, + const_xmlChar* c_node_name, + qname* c_qname) noexcept: + """Tests if name and namespace URI match those of c_qname. + + This differs from _tagMatches() in that it does not consider a + NULL value in qname.href a wildcard, and that it expects the c_name + to be taken from the doc dict, i.e. it only compares the names by + address. + + A node matches if it matches both href and c_name of the qname. + + A node matches c_href if any of the following is true: + * its namespace is NULL and c_href is the empty string + * its namespace string equals the c_href string + + A node matches c_name if any of the following is true: + * c_name is NULL + * its name string points to the same address (!) as c_name + """ + cdef char* c_href + if c_qname.c_name is not NULL and c_qname.c_name is not c_node_name: + return 0 + if c_qname.href is NULL: + return 1 + c_href = python.__cstr(c_qname.href) + if c_href[0] == b'\0': + return c_node_href is NULL or c_node_href[0] == b'\0' + elif c_node_href is NULL: + return 0 + else: + return tree.xmlStrcmp(<const_xmlChar*>c_href, c_node_href) == 0 + +cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags, + qname* c_ns_tags, bint force_into_dict) except -1: + """Map a sequence of (name, namespace) pairs to a qname array for efficient + matching with _tagMatchesExactly() above. + + Note that each qname struct in the array owns its href byte string object + if it is not NULL. + """ + cdef Py_ssize_t count = 0, i + cdef bytes ns, tag + for ns, tag in ns_tags: + if tag is None: + c_tag = <const_xmlChar*>NULL + elif force_into_dict: + c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag)) + if c_tag is NULL: + # clean up before raising the error + for i in xrange(count): + cpython.ref.Py_XDECREF(c_ns_tags[i].href) + raise MemoryError() + else: + c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), len(tag)) + if c_tag is NULL: + # not in the dict => not in the document + continue + c_ns_tags[count].c_name = c_tag + if ns is None: + c_ns_tags[count].href = NULL + else: + cpython.ref.Py_INCREF(ns) # keep an owned reference! + c_ns_tags[count].href = <python.PyObject*>ns + count += 1 + return count + +cdef int _removeNode(_Document doc, xmlNode* c_node) except -1: + """Unlink and free a node and subnodes if possible. Otherwise, make sure + it's self-contained. + """ + cdef xmlNode* c_next + c_next = c_node.next + tree.xmlUnlinkNode(c_node) + _moveTail(c_next, c_node) + if not attemptDeallocation(c_node): + # make namespaces absolute + moveNodeToDocument(doc, c_node.doc, c_node) + return 0 + +cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint with_tail) except -1: + cdef xmlNode* c_node + cdef xmlNode* c_next + c_node = c_element.next + while c_node is not NULL: + c_next = _nextElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + c_node = c_element.prev + while c_node is not NULL: + c_next = _previousElement(c_node) + if c_node.type == node_type: + if with_tail: + _removeText(c_node.next) + tree.xmlUnlinkNode(c_node) + attemptDeallocation(c_node) + c_node = c_next + return 0 + +cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target) noexcept: + cdef xmlNode* c_next + # tail support: look for any text nodes trailing this node and + # move them too + c_tail = _textNodeOrSkip(c_tail) + while c_tail is not NULL: + c_next = _textNodeOrSkip(c_tail.next) + c_target = tree.xmlAddNextSibling(c_target, c_tail) + c_tail = c_next + +cdef int _copyTail(xmlNode* c_tail, xmlNode* c_target) except -1: + cdef xmlNode* c_new_tail + # tail copying support: look for any text nodes trailing this node and + # copy it to the target node + c_tail = _textNodeOrSkip(c_tail) + while c_tail is not NULL: + if c_target.doc is not c_tail.doc: + c_new_tail = tree.xmlDocCopyNode(c_tail, c_target.doc, 0) + else: + c_new_tail = tree.xmlCopyNode(c_tail, 0) + if c_new_tail is NULL: + raise MemoryError() + c_target = tree.xmlAddNextSibling(c_target, c_new_tail) + c_tail = _textNodeOrSkip(c_tail.next) + return 0 + +cdef int _copyNonElementSiblings(xmlNode* c_node, xmlNode* c_target) except -1: + cdef xmlNode* c_copy + cdef xmlNode* c_sibling = c_node + while c_sibling.prev != NULL and \ + (c_sibling.prev.type == tree.XML_PI_NODE or + c_sibling.prev.type == tree.XML_COMMENT_NODE or + c_sibling.prev.type == tree.XML_DTD_NODE): + c_sibling = c_sibling.prev + while c_sibling != c_node: + if c_sibling.type == tree.XML_DTD_NODE: + c_copy = <xmlNode*>_copyDtd(<tree.xmlDtd*>c_sibling) + if c_sibling == <xmlNode*>c_node.doc.intSubset: + c_target.doc.intSubset = <tree.xmlDtd*>c_copy + else: # c_sibling == c_node.doc.extSubset + c_target.doc.extSubset = <tree.xmlDtd*>c_copy + else: + c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1) + if c_copy is NULL: + raise MemoryError() + tree.xmlAddPrevSibling(c_target, c_copy) + c_sibling = c_sibling.next + while c_sibling.next != NULL and \ + (c_sibling.next.type == tree.XML_PI_NODE or + c_sibling.next.type == tree.XML_COMMENT_NODE): + c_sibling = c_sibling.next + c_copy = tree.xmlDocCopyNode(c_sibling, c_target.doc, 1) + if c_copy is NULL: + raise MemoryError() + tree.xmlAddNextSibling(c_target, c_copy) + +cdef int _deleteSlice(_Document doc, xmlNode* c_node, + Py_ssize_t count, Py_ssize_t step) except -1: + """Delete slice, ``count`` items starting with ``c_node`` with a step + width of ``step``. + """ + cdef xmlNode* c_next + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element + if c_node is NULL: + return 0 + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement + # now start deleting nodes + c = 0 + c_next = c_node + while c_node is not NULL and c < count: + for i in range(step): + c_next = next_element(c_next) + if c_next is NULL: + break + _removeNode(doc, c_node) + c += 1 + c_node = c_next + return 0 + +cdef int _replaceSlice(_Element parent, xmlNode* c_node, + Py_ssize_t slicelength, Py_ssize_t step, + bint left_to_right, elements) except -1: + """Replace the slice of ``count`` elements starting at ``c_node`` with + positive step width ``step`` by the Elements in ``elements``. The + direction is given by the boolean argument ``left_to_right``. + + ``c_node`` may be NULL to indicate the end of the children list. + """ + cdef xmlNode* c_orig_neighbour + cdef xmlNode* c_next + cdef xmlDoc* c_source_doc + cdef _Element element + cdef Py_ssize_t seqlength, i, c + cdef _node_to_node_function next_element + assert step > 0 + if left_to_right: + next_element = _nextElement + else: + next_element = _previousElement + + if not isinstance(elements, (list, tuple)): + elements = list(elements) + + if step != 1 or not left_to_right: + # *replacing* children stepwise with list => check size! + seqlength = len(elements) + if seqlength != slicelength: + raise ValueError, f"attempt to assign sequence of size {seqlength} " \ + f"to extended slice of size {slicelength}" + + if c_node is NULL: + # no children yet => add all elements straight away + if left_to_right: + for element in elements: + assert element is not None, "Node must not be None" + _appendChild(parent, element) + else: + for element in elements: + assert element is not None, "Node must not be None" + _prependChild(parent, element) + return 0 + + # remove the elements first as some might be re-added + if left_to_right: + # L->R, remember left neighbour + c_orig_neighbour = _previousElement(c_node) + else: + # R->L, remember right neighbour + c_orig_neighbour = _nextElement(c_node) + + # We remove the original slice elements one by one. Since we hold + # a Python reference to all elements that we will insert, it is + # safe to let _removeNode() try (and fail) to free them even if + # the element itself or one of its descendents will be reinserted. + c = 0 + c_next = c_node + while c_node is not NULL and c < slicelength: + for i in range(step): + c_next = next_element(c_next) + if c_next is NULL: + break + _removeNode(parent._doc, c_node) + c += 1 + c_node = c_next + + # make sure each element is inserted only once + elements = iter(elements) + + # find the first node right of the new insertion point + if left_to_right: + if c_orig_neighbour is not NULL: + c_node = next_element(c_orig_neighbour) + else: + # before the first element + c_node = _findChildForwards(parent._c_node, 0) + elif c_orig_neighbour is NULL: + # at the end, but reversed stepping + # append one element and go to the next insertion point + for element in elements: + assert element is not None, "Node must not be None" + _appendChild(parent, element) + c_node = element._c_node + if slicelength > 0: + slicelength -= 1 + for i in range(1, step): + c_node = next_element(c_node) + if c_node is NULL: + break + break + else: + c_node = c_orig_neighbour + + if left_to_right: + # adjust step size after removing slice as we are not stepping + # over the newly inserted elements + step -= 1 + + # now insert elements where we removed them + if c_node is not NULL: + for element in elements: + assert element is not None, "Node must not be None" + _assertValidNode(element) + # move element and tail over + c_source_doc = element._c_node.doc + c_next = element._c_node.next + tree.xmlAddPrevSibling(c_node, element._c_node) + _moveTail(c_next, element._c_node) + + # integrate element into new document + moveNodeToDocument(parent._doc, c_source_doc, element._c_node) + + # stop at the end of the slice + if slicelength > 0: + slicelength -= 1 + for i in range(step): + c_node = next_element(c_node) + if c_node is NULL: + break + if c_node is NULL: + break + else: + # everything inserted + return 0 + + # append the remaining elements at the respective end + if left_to_right: + for element in elements: + assert element is not None, "Node must not be None" + _assertValidNode(element) + _appendChild(parent, element) + else: + for element in elements: + assert element is not None, "Node must not be None" + _assertValidNode(element) + _prependChild(parent, element) + + return 0 + + +cdef int _linkChild(xmlNode* c_parent, xmlNode* c_node) except -1: + """Adaptation of 'xmlAddChild()' that deep-fix the document links iteratively. + """ + assert _isElement(c_node) + c_node.parent = c_parent + if c_parent.children is NULL: + c_parent.children = c_parent.last = c_node + else: + c_node.prev = c_parent.last + c_parent.last.next = c_node + c_parent.last = c_node + + _setTreeDoc(c_node, c_parent.doc) + return 0 + + +cdef int _appendChild(_Element parent, _Element child) except -1: + """Append a new child to a parent element. + """ + c_node = child._c_node + c_source_doc = c_node.doc + # prevent cycles + if _isAncestorOrSame(c_node, parent._c_node): + raise ValueError("cannot append parent to itself") + # store possible text node + c_next = c_node.next + # move node itself + tree.xmlUnlinkNode(c_node) + # do not call xmlAddChild() here since it would deep-traverse the tree + _linkChild(parent._c_node, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(parent._doc, c_source_doc, c_node) + return 0 + +cdef int _prependChild(_Element parent, _Element child) except -1: + """Prepend a new child to a parent element. + """ + c_node = child._c_node + c_source_doc = c_node.doc + # prevent cycles + if _isAncestorOrSame(c_node, parent._c_node): + raise ValueError("cannot append parent to itself") + # store possible text node + c_next = c_node.next + # move node itself + c_child = _findChildForwards(parent._c_node, 0) + if c_child is NULL: + tree.xmlUnlinkNode(c_node) + # do not call xmlAddChild() here since it would deep-traverse the tree + _linkChild(parent._c_node, c_node) + else: + tree.xmlAddPrevSibling(c_child, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(parent._doc, c_source_doc, c_node) + return 0 + +cdef int _appendSibling(_Element element, _Element sibling) except -1: + """Add a new sibling behind an element. + """ + return _addSibling(element, sibling, as_next=True) + +cdef int _prependSibling(_Element element, _Element sibling) except -1: + """Add a new sibling before an element. + """ + return _addSibling(element, sibling, as_next=False) + +cdef int _addSibling(_Element element, _Element sibling, bint as_next) except -1: + c_node = sibling._c_node + c_source_doc = c_node.doc + # prevent cycles + if _isAncestorOrSame(c_node, element._c_node): + if element._c_node is c_node: + return 0 # nothing to do + raise ValueError("cannot add ancestor as sibling, please break cycle first") + # store possible text node + c_next = c_node.next + # move node itself + if as_next: + # must insert after any tail text + c_next_node = _nextElement(element._c_node) + if c_next_node is NULL: + c_next_node = element._c_node + while c_next_node.next: + c_next_node = c_next_node.next + tree.xmlAddNextSibling(c_next_node, c_node) + else: + tree.xmlAddPrevSibling(c_next_node, c_node) + else: + tree.xmlAddPrevSibling(element._c_node, c_node) + _moveTail(c_next, c_node) + # uh oh, elements may be pointing to different doc when + # parent element has moved; change them too.. + moveNodeToDocument(element._doc, c_source_doc, c_node) + return 0 + +cdef inline bint isutf8(const_xmlChar* s) noexcept: + cdef xmlChar c = s[0] + while c != c'\0': + if c & 0x80: + return True + s += 1 + c = s[0] + return False + +cdef bint isutf8l(const_xmlChar* s, size_t length) noexcept: + """ + Search for non-ASCII characters in the string, knowing its length in advance. + """ + cdef unsigned int i + cdef unsigned long non_ascii_mask + cdef const unsigned long *lptr = <const unsigned long*> s + + cdef const unsigned long *end = lptr + length // sizeof(unsigned long) + if length >= sizeof(non_ascii_mask): + # Build constant 0x80808080... mask (and let the C compiler fold it). + non_ascii_mask = 0 + for i in range(sizeof(non_ascii_mask) // 2): + non_ascii_mask = (non_ascii_mask << 16) | 0x8080 + + # Advance to long-aligned character before we start reading longs. + while (<size_t>s) % sizeof(unsigned long) and s < <const_xmlChar *>end: + if s[0] & 0x80: + return True + s += 1 + + # Read one long at a time + lptr = <const unsigned long*> s + while lptr < end: + if lptr[0] & non_ascii_mask: + return True + lptr += 1 + s = <const_xmlChar *>lptr + + while s < (<const_xmlChar *>end + length % sizeof(unsigned long)): + if s[0] & 0x80: + return True + s += 1 + + return False + +cdef int _is_valid_xml_ascii(bytes pystring) except -1: + """Check if a string is XML ascii content.""" + cdef signed char ch + # When ch is a *signed* char, non-ascii characters are negative integers + # and xmlIsChar_ch does not accept them. + for ch in pystring: + if not tree.xmlIsChar_ch(ch): + return 0 + return 1 + +cdef bint _is_valid_xml_utf8(bytes pystring) except -1: + """Check if a string is like valid UTF-8 XML content.""" + cdef const_xmlChar* s = _xcstr(pystring) + cdef const_xmlChar* c_end = s + len(pystring) + cdef unsigned long next3 = 0 + if s < c_end - 2: + next3 = (s[0] << 8) | (s[1]) + + while s < c_end - 2: + next3 = 0x00ffffff & ((next3 << 8) | s[2]) + if s[0] & 0x80: + # 0xefbfbe and 0xefbfbf are utf-8 encodings of + # forbidden characters \ufffe and \uffff + if next3 == 0x00efbfbe or next3 == 0x00efbfbf: + return 0 + # 0xeda080 and 0xedbfbf are utf-8 encodings of + # \ud800 and \udfff. Anything between them (inclusive) + # is forbidden, because they are surrogate blocks in utf-16. + if 0x00eda080 <= next3 <= 0x00edbfbf: + return 0 + elif not tree.xmlIsChar_ch(s[0]): + return 0 # invalid ascii char + s += 1 + + while s < c_end: + if not s[0] & 0x80 and not tree.xmlIsChar_ch(s[0]): + return 0 # invalid ascii char + s += 1 + + return 1 + +cdef inline unicode funicodeOrNone(const_xmlChar* s): + return funicode(s) if s is not NULL else None + +cdef inline unicode funicodeOrEmpty(const_xmlChar* s): + return funicode(s) if s is not NULL else '' + +cdef unicode funicode(const_xmlChar* s): + return s.decode('UTF-8') + +cdef bytes _utf8(object s): + """Test if a string is valid user input and encode it to UTF-8. + Reject all bytes/unicode input that contains non-XML characters. + Reject all bytes input that contains non-ASCII characters. + """ + cdef int valid + cdef bytes utf8_string + if isinstance(s, unicode): + utf8_string = (<unicode>s).encode('utf8') + valid = _is_valid_xml_utf8(utf8_string) + elif isinstance(s, (bytes, bytearray)): + utf8_string = s if type(s) is bytes else bytes(s) + valid = _is_valid_xml_ascii(utf8_string) + else: + raise TypeError("Argument must be bytes or unicode, got '%.200s'" % type(s).__name__) + if not valid: + raise ValueError( + "All strings must be XML compatible: Unicode or ASCII, no NULL bytes or control characters") + return utf8_string + + +cdef bytes _utf8orNone(object s): + return _utf8(s) if s is not None else None + + +cdef enum: + NO_FILE_PATH = 0 + ABS_UNIX_FILE_PATH = 1 + ABS_WIN_FILE_PATH = 2 + REL_FILE_PATH = 3 + + +cdef bint _isFilePath(const_xmlChar* c_path) noexcept: + "simple heuristic to see if a path is a filename" + cdef xmlChar c + # test if it looks like an absolute Unix path or a Windows network path + if c_path[0] == c'/': + return ABS_UNIX_FILE_PATH + + # test if it looks like an absolute Windows path or URL + if c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z': + c_path += 1 + if c_path[0] == c':' and c_path[1] in b'\0\\': + return ABS_WIN_FILE_PATH # C: or C:\... + + # test if it looks like a URL with scheme:// + while c'a' <= c_path[0] <= c'z' or c'A' <= c_path[0] <= c'Z': + c_path += 1 + if c_path[0] == c':' and c_path[1] == c'/' and c_path[2] == c'/': + return NO_FILE_PATH + + # assume it's a relative path + return REL_FILE_PATH + + +cdef object _getFSPathOrObject(object obj): + """ + Get the __fspath__ attribute of an object if it exists. + Otherwise, the original object is returned. + """ + if _isString(obj): + return obj + try: + return python.PyOS_FSPath(obj) + except TypeError: + return obj + + +cdef object _encodeFilename(object filename): + """Make sure a filename is 8-bit encoded (or None). + """ + if filename is None: + return None + elif isinstance(filename, bytes): + return filename + elif isinstance(filename, unicode): + filename8 = (<unicode>filename).encode('utf8') + if _isFilePath(<unsigned char*>filename8): + try: + return python.PyUnicode_AsEncodedString( + filename, _C_FILENAME_ENCODING, NULL) + except UnicodeEncodeError: + pass + return filename8 + else: + raise TypeError("Argument must be string or unicode.") + +cdef object _decodeFilename(const_xmlChar* c_path): + """Make the filename a unicode string if we are in Py3. + """ + return _decodeFilenameWithLength(c_path, tree.xmlStrlen(c_path)) + +cdef object _decodeFilenameWithLength(const_xmlChar* c_path, size_t c_len): + """Make the filename a unicode string if we are in Py3. + """ + if _isFilePath(c_path): + try: + return python.PyUnicode_Decode( + <const_char*>c_path, c_len, _C_FILENAME_ENCODING, NULL) + except UnicodeDecodeError: + pass + try: + return (<unsigned char*>c_path)[:c_len].decode('UTF-8') + except UnicodeDecodeError: + # this is a stupid fallback, but it might still work... + return (<unsigned char*>c_path)[:c_len].decode('latin-1', 'replace') + +cdef object _encodeFilenameUTF8(object filename): + """Recode filename as UTF-8. Tries ASCII, local filesystem encoding and + UTF-8 as source encoding. + """ + cdef char* c_filename + if filename is None: + return None + elif isinstance(filename, bytes): + if not isutf8l(<bytes>filename, len(<bytes>filename)): + # plain ASCII! + return filename + c_filename = _cstr(<bytes>filename) + try: + # try to decode with default encoding + filename = python.PyUnicode_Decode( + c_filename, len(<bytes>filename), + _C_FILENAME_ENCODING, NULL) + except UnicodeDecodeError as decode_exc: + try: + # try if it's proper UTF-8 + (<bytes>filename).decode('utf8') + return filename + except UnicodeDecodeError: + raise decode_exc # otherwise re-raise original exception + if isinstance(filename, unicode): + return (<unicode>filename).encode('utf8') + else: + raise TypeError("Argument must be string or unicode.") + +cdef tuple _getNsTag(tag): + """Given a tag, find namespace URI and tag name. + Return None for NS uri if no namespace URI provided. + """ + return __getNsTag(tag, 0) + +cdef tuple _getNsTagWithEmptyNs(tag): + """Given a tag, find namespace URI and tag name. Return None for NS uri + if no namespace URI provided, or the empty string if namespace + part is '{}'. + """ + return __getNsTag(tag, 1) + +cdef tuple __getNsTag(tag, bint empty_ns): + cdef char* c_tag + cdef char* c_ns_end + cdef Py_ssize_t taglen + cdef Py_ssize_t nslen + cdef bytes ns = None + # _isString() is much faster than isinstance() + if not _isString(tag) and isinstance(tag, QName): + tag = (<QName>tag).text + tag = _utf8(tag) + c_tag = _cstr(tag) + if c_tag[0] == c'{': + c_tag += 1 + c_ns_end = cstring_h.strchr(c_tag, c'}') + if c_ns_end is NULL: + raise ValueError, "Invalid tag name" + nslen = c_ns_end - c_tag + taglen = python.PyBytes_GET_SIZE(tag) - nslen - 2 + if taglen == 0: + raise ValueError, "Empty tag name" + if nslen > 0: + ns = <bytes>c_tag[:nslen] + elif empty_ns: + ns = b'' + tag = <bytes>c_ns_end[1:taglen+1] + elif python.PyBytes_GET_SIZE(tag) == 0: + raise ValueError, "Empty tag name" + return ns, tag + +cdef inline int _pyXmlNameIsValid(name_utf8): + return _xmlNameIsValid(_xcstr(name_utf8)) and b':' not in name_utf8 + +cdef inline int _pyHtmlNameIsValid(name_utf8): + return _htmlNameIsValid(_xcstr(name_utf8)) + +cdef inline int _xmlNameIsValid(const_xmlChar* c_name) noexcept: + return tree.xmlValidateNameValue(c_name) + +cdef int _htmlNameIsValid(const_xmlChar* c_name) noexcept: + if c_name is NULL or c_name[0] == c'\0': + return 0 + while c_name[0] != c'\0': + if c_name[0] in b'&<>/"\'\t\n\x0B\x0C\r ': + return 0 + c_name += 1 + return 1 + +cdef bint _characterReferenceIsValid(const_xmlChar* c_name) noexcept: + cdef bint is_hex + if c_name[0] == c'x': + c_name += 1 + is_hex = 1 + else: + is_hex = 0 + if c_name[0] == c'\0': + return 0 + while c_name[0] != c'\0': + if c_name[0] < c'0' or c_name[0] > c'9': + if not is_hex: + return 0 + if not (c'a' <= c_name[0] <= c'f'): + if not (c'A' <= c_name[0] <= c'F'): + return 0 + c_name += 1 + return 1 + +cdef int _tagValidOrRaise(tag_utf) except -1: + if not _pyXmlNameIsValid(tag_utf): + raise ValueError(f"Invalid tag name {(<bytes>tag_utf).decode('utf8')!r}") + return 0 + +cdef int _htmlTagValidOrRaise(tag_utf) except -1: + if not _pyHtmlNameIsValid(tag_utf): + raise ValueError(f"Invalid HTML tag name {(<bytes>tag_utf).decode('utf8')!r}") + return 0 + +cdef int _attributeValidOrRaise(name_utf) except -1: + if not _pyXmlNameIsValid(name_utf): + raise ValueError(f"Invalid attribute name {(<bytes>name_utf).decode('utf8')!r}") + return 0 + +cdef int _prefixValidOrRaise(tag_utf) except -1: + if not _pyXmlNameIsValid(tag_utf): + raise ValueError(f"Invalid namespace prefix {(<bytes>tag_utf).decode('utf8')!r}") + return 0 + +cdef int _uriValidOrRaise(uri_utf) except -1: + cdef uri.xmlURI* c_uri = uri.xmlParseURI(_cstr(uri_utf)) + if c_uri is NULL: + raise ValueError(f"Invalid namespace URI {(<bytes>uri_utf).decode('utf8')!r}") + uri.xmlFreeURI(c_uri) + return 0 + +cdef inline unicode _namespacedName(xmlNode* c_node): + return _namespacedNameFromNsName(_getNs(c_node), c_node.name) + + +cdef unicode _namespacedNameFromNsName(const_xmlChar* c_href, const_xmlChar* c_name): + name = funicode(c_name) + if c_href is NULL: + return name + href = funicode(c_href) + return f"{{{href}}}{name}" + + +cdef _getFilenameForFile(source): + """Given a Python File or Gzip object, give filename back. + + Returns None if not a file object. + """ + # urllib2 provides a geturl() method + try: + return source.geturl() + except: + pass + # file instances have a name attribute + try: + filename = source.name + if _isString(filename): + return os_path_abspath(filename) + except: + pass + # gzip file instances have a filename attribute (before Py3k) + try: + filename = source.filename + if _isString(filename): + return os_path_abspath(filename) + except: + pass + # can't determine filename + return None diff --git a/.venv/lib/python3.12/site-packages/lxml/builder.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/builder.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..262245fe --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/builder.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/builder.py b/.venv/lib/python3.12/site-packages/lxml/builder.py new file mode 100644 index 00000000..cff67b0b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/builder.py @@ -0,0 +1,232 @@ +# cython: language_level=2 + +# +# Element generator factory by Fredrik Lundh. +# +# Source: +# http://online.effbot.org/2006_11_01_archive.htm#et-builder +# http://effbot.python-hosting.com/file/stuff/sandbox/elementlib/builder.py +# +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# +# Copyright (c) 1999-2004 by Fredrik Lundh +# +# By obtaining, using, and/or copying this software and/or its +# associated documentation, you agree that you have read, understood, +# and will comply with the following terms and conditions: +# +# Permission to use, copy, modify, and distribute this software and +# its associated documentation for any purpose and without fee is +# hereby granted, provided that the above copyright notice appears in +# all copies, and that both that copyright notice and this permission +# notice appear in supporting documentation, and that the name of +# Secret Labs AB or the author not be used in advertising or publicity +# pertaining to distribution of the software without specific, written +# prior permission. +# +# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD +# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT- +# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR +# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY +# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, +# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS +# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE +# OF THIS SOFTWARE. +# -------------------------------------------------------------------- + +""" +The ``E`` Element factory for generating XML documents. +""" + + +import lxml.etree as ET +_QName = ET.QName + +from functools import partial + +try: + basestring +except NameError: + basestring = str + +try: + unicode +except NameError: + unicode = str + + +class ElementMaker: + """Element generator factory. + + Unlike the ordinary Element factory, the E factory allows you to pass in + more than just a tag and some optional attributes; you can also pass in + text and other elements. The text is added as either text or tail + attributes, and elements are inserted at the right spot. Some small + examples:: + + >>> from lxml import etree as ET + >>> from lxml.builder import E + + >>> ET.tostring(E("tag")) + '<tag/>' + >>> ET.tostring(E("tag", "text")) + '<tag>text</tag>' + >>> ET.tostring(E("tag", "text", key="value")) + '<tag key="value">text</tag>' + >>> ET.tostring(E("tag", E("subtag", "text"), "tail")) + '<tag><subtag>text</subtag>tail</tag>' + + For simple tags, the factory also allows you to write ``E.tag(...)`` instead + of ``E('tag', ...)``:: + + >>> ET.tostring(E.tag()) + '<tag/>' + >>> ET.tostring(E.tag("text")) + '<tag>text</tag>' + >>> ET.tostring(E.tag(E.subtag("text"), "tail")) + '<tag><subtag>text</subtag>tail</tag>' + + Here's a somewhat larger example; this shows how to generate HTML + documents, using a mix of prepared factory functions for inline elements, + nested ``E.tag`` calls, and embedded XHTML fragments:: + + # some common inline elements + A = E.a + I = E.i + B = E.b + + def CLASS(v): + # helper function, 'class' is a reserved word + return {'class': v} + + page = ( + E.html( + E.head( + E.title("This is a sample document") + ), + E.body( + E.h1("Hello!", CLASS("title")), + E.p("This is a paragraph with ", B("bold"), " text in it!"), + E.p("This is another paragraph, with a ", + A("link", href="http://www.python.org"), "."), + E.p("Here are some reserved characters: <spam&egg>."), + ET.XML("<p>And finally, here is an embedded XHTML fragment.</p>"), + ) + ) + ) + + print ET.tostring(page) + + Here's a prettyprinted version of the output from the above script:: + + <html> + <head> + <title>This is a sample document</title> + </head> + <body> + <h1 class="title">Hello!</h1> + <p>This is a paragraph with <b>bold</b> text in it!</p> + <p>This is another paragraph, with <a href="http://www.python.org">link</a>.</p> + <p>Here are some reserved characters: <spam&egg>.</p> + <p>And finally, here is an embedded XHTML fragment.</p> + </body> + </html> + + For namespace support, you can pass a namespace map (``nsmap``) + and/or a specific target ``namespace`` to the ElementMaker class:: + + >>> E = ElementMaker(namespace="http://my.ns/") + >>> print(ET.tostring( E.test )) + <test xmlns="http://my.ns/"/> + + >>> E = ElementMaker(namespace="http://my.ns/", nsmap={'p':'http://my.ns/'}) + >>> print(ET.tostring( E.test )) + <p:test xmlns:p="http://my.ns/"/> + """ + + def __init__(self, typemap=None, + namespace=None, nsmap=None, makeelement=None): + self._namespace = '{' + namespace + '}' if namespace is not None else None + self._nsmap = dict(nsmap) if nsmap else None + + assert makeelement is None or callable(makeelement) + self._makeelement = makeelement if makeelement is not None else ET.Element + + # initialize the default type map functions for this element factory + typemap = dict(typemap) if typemap else {} + + def add_text(elem, item): + try: + last_child = elem[-1] + except IndexError: + elem.text = (elem.text or "") + item + else: + last_child.tail = (last_child.tail or "") + item + + def add_cdata(elem, cdata): + if elem.text: + raise ValueError("Can't add a CDATA section. Element already has some text: %r" % elem.text) + elem.text = cdata + + if str not in typemap: + typemap[str] = add_text + if unicode not in typemap: + typemap[unicode] = add_text + if ET.CDATA not in typemap: + typemap[ET.CDATA] = add_cdata + + def add_dict(elem, item): + attrib = elem.attrib + for k, v in item.items(): + if isinstance(v, basestring): + attrib[k] = v + else: + attrib[k] = typemap[type(v)](None, v) + + if dict not in typemap: + typemap[dict] = add_dict + + self._typemap = typemap + + def __call__(self, tag, *children, **attrib): + typemap = self._typemap + + # We'll usually get a 'str', and the compiled type check is very fast. + if not isinstance(tag, str) and isinstance(tag, _QName): + # A QName is explicitly qualified, do not look at self._namespace. + tag = tag.text + elif self._namespace is not None and tag[0] != '{': + tag = self._namespace + tag + elem = self._makeelement(tag, nsmap=self._nsmap) + if attrib: + typemap[dict](elem, attrib) + + for item in children: + if callable(item): + item = item() + t = typemap.get(type(item)) + if t is None: + if ET.iselement(item): + elem.append(item) + continue + for basetype in type(item).__mro__: + # See if the typemap knows of any of this type's bases. + t = typemap.get(basetype) + if t is not None: + break + else: + raise TypeError("bad argument type: %s(%r)" % + (type(item).__name__, item)) + v = t(elem, item) + if v: + typemap.get(type(v))(elem, v) + + return elem + + def __getattr__(self, tag): + return partial(self, tag) + + +# create factory object +E = ElementMaker() diff --git a/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi new file mode 100644 index 00000000..92d1d47a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi @@ -0,0 +1,580 @@ +# Configurable Element class lookup + +################################################################################ +# Custom Element classes + +cdef public class ElementBase(_Element) [ type LxmlElementBaseType, + object LxmlElementBase ]: + """ElementBase(*children, attrib=None, nsmap=None, **_extra) + + The public Element class. All custom Element classes must inherit + from this one. To create an Element, use the `Element()` factory. + + BIG FAT WARNING: Subclasses *must not* override __init__ or + __new__ as it is absolutely undefined when these objects will be + created or destroyed. All persistent state of Elements must be + stored in the underlying XML. If you really need to initialize + the object after creation, you can implement an ``_init(self)`` + method that will be called directly after object creation. + + Subclasses of this class can be instantiated to create a new + Element. By default, the tag name will be the class name and the + namespace will be empty. You can modify this with the following + class attributes: + + * TAG - the tag name, possibly containing a namespace in Clark + notation + + * NAMESPACE - the default namespace URI, unless provided as part + of the TAG attribute. + + * HTML - flag if the class is an HTML tag, as opposed to an XML + tag. This only applies to un-namespaced tags and defaults to + false (i.e. XML). + + * PARSER - the parser that provides the configuration for the + newly created document. Providing an HTML parser here will + default to creating an HTML element. + + In user code, the latter three are commonly inherited in class + hierarchies that implement a common namespace. + """ + def __init__(self, *children, attrib=None, nsmap=None, **_extra): + """ElementBase(*children, attrib=None, nsmap=None, **_extra) + """ + cdef bint is_html = 0 + cdef _BaseParser parser + cdef _Element last_child + # don't use normal attribute access as it might be overridden + _getattr = object.__getattribute__ + try: + namespace = _utf8(_getattr(self, 'NAMESPACE')) + except AttributeError: + namespace = None + try: + ns, tag = _getNsTag(_getattr(self, 'TAG')) + if ns is not None: + namespace = ns + except AttributeError: + tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__')) + if b'.' in tag: + tag = tag.split(b'.')[-1] + try: + parser = _getattr(self, 'PARSER') + except AttributeError: + parser = None + for child in children: + if isinstance(child, _Element): + parser = (<_Element>child)._doc._parser + break + if isinstance(parser, HTMLParser): + is_html = 1 + if namespace is None: + try: + is_html = _getattr(self, 'HTML') + except AttributeError: + pass + _initNewElement(self, is_html, tag, namespace, parser, + attrib, nsmap, _extra) + last_child = None + for child in children: + if _isString(child): + if last_child is None: + _setNodeText(self._c_node, + (_collectText(self._c_node.children) or '') + child) + else: + _setTailText(last_child._c_node, + (_collectText(last_child._c_node.next) or '') + child) + elif isinstance(child, _Element): + last_child = child + _appendChild(self, last_child) + elif isinstance(child, type) and issubclass(child, ElementBase): + last_child = child() + _appendChild(self, last_child) + else: + raise TypeError, f"Invalid child type: {type(child)!r}" + +cdef class CommentBase(_Comment): + """All custom Comment classes must inherit from this one. + + To create an XML Comment instance, use the ``Comment()`` factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of Comments must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, text): + # copied from Comment() factory + cdef _Document doc + cdef xmlDoc* c_doc + if text is None: + text = b'' + else: + text = _utf8(text) + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createComment(c_doc, _xcstr(text)) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + +cdef class PIBase(_ProcessingInstruction): + """All custom Processing Instruction classes must inherit from this one. + + To create an XML ProcessingInstruction instance, use the ``PI()`` + factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of PIs must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, target, text=None): + # copied from PI() factory + cdef _Document doc + cdef xmlDoc* c_doc + target = _utf8(target) + if text is None: + text = b'' + else: + text = _utf8(text) + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + +cdef class EntityBase(_Entity): + """All custom Entity classes must inherit from this one. + + To create an XML Entity instance, use the ``Entity()`` factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of Entities must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, name): + cdef _Document doc + cdef xmlDoc* c_doc + name_utf = _utf8(name) + c_name = _xcstr(name_utf) + if c_name[0] == c'#': + if not _characterReferenceIsValid(c_name + 1): + raise ValueError, f"Invalid character reference: '{name}'" + elif not _xmlNameIsValid(c_name): + raise ValueError, f"Invalid entity reference: '{name}'" + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createEntity(c_doc, c_name) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + + +cdef int _validateNodeClass(xmlNode* c_node, cls) except -1: + if c_node.type == tree.XML_ELEMENT_NODE: + expected = ElementBase + elif c_node.type == tree.XML_COMMENT_NODE: + expected = CommentBase + elif c_node.type == tree.XML_ENTITY_REF_NODE: + expected = EntityBase + elif c_node.type == tree.XML_PI_NODE: + expected = PIBase + else: + assert False, f"Unknown node type: {c_node.type}" + + if not (isinstance(cls, type) and issubclass(cls, expected)): + raise TypeError( + f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}") + return 0 + + +################################################################################ +# Element class lookup + +ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) + +# class to store element class lookup functions +cdef public class ElementClassLookup [ type LxmlElementClassLookupType, + object LxmlElementClassLookup ]: + """ElementClassLookup(self) + Superclass of Element class lookups. + """ + cdef _element_class_lookup_function _lookup_function + + +cdef public class FallbackElementClassLookup(ElementClassLookup) \ + [ type LxmlFallbackElementClassLookupType, + object LxmlFallbackElementClassLookup ]: + """FallbackElementClassLookup(self, fallback=None) + + Superclass of Element class lookups with additional fallback. + """ + cdef readonly ElementClassLookup fallback + cdef _element_class_lookup_function _fallback_function + def __cinit__(self): + # fall back to default lookup + self._fallback_function = _lookupDefaultElementClass + + def __init__(self, ElementClassLookup fallback=None): + if fallback is not None: + self._setFallback(fallback) + else: + self._fallback_function = _lookupDefaultElementClass + + cdef void _setFallback(self, ElementClassLookup lookup): + """Sets the fallback scheme for this lookup method. + """ + self.fallback = lookup + self._fallback_function = lookup._lookup_function + if self._fallback_function is NULL: + self._fallback_function = _lookupDefaultElementClass + + def set_fallback(self, ElementClassLookup lookup not None): + """set_fallback(self, lookup) + + Sets the fallback scheme for this lookup method. + """ + self._setFallback(lookup) + +cdef inline object _callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, xmlNode* c_node): + return lookup._fallback_function(lookup.fallback, doc, c_node) + + +################################################################################ +# default lookup scheme + +cdef class ElementDefaultClassLookup(ElementClassLookup): + """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None) + Element class lookup scheme that always returns the default Element + class. + + The keyword arguments ``element``, ``comment``, ``pi`` and ``entity`` + accept the respective Element classes. + """ + cdef readonly object element_class + cdef readonly object comment_class + cdef readonly object pi_class + cdef readonly object entity_class + def __cinit__(self): + self._lookup_function = _lookupDefaultElementClass + + def __init__(self, element=None, comment=None, pi=None, entity=None): + if element is None: + self.element_class = _Element + elif issubclass(element, ElementBase): + self.element_class = element + else: + raise TypeError, "element class must be subclass of ElementBase" + + if comment is None: + self.comment_class = _Comment + elif issubclass(comment, CommentBase): + self.comment_class = comment + else: + raise TypeError, "comment class must be subclass of CommentBase" + + if entity is None: + self.entity_class = _Entity + elif issubclass(entity, EntityBase): + self.entity_class = entity + else: + raise TypeError, "Entity class must be subclass of EntityBase" + + if pi is None: + self.pi_class = None # special case, see below + elif issubclass(pi, PIBase): + self.pi_class = pi + else: + raise TypeError, "PI class must be subclass of PIBase" + +cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node): + "Trivial class lookup function that always returns the default class." + if c_node.type == tree.XML_ELEMENT_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).element_class + else: + return _Element + elif c_node.type == tree.XML_COMMENT_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).comment_class + else: + return _Comment + elif c_node.type == tree.XML_ENTITY_REF_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).entity_class + else: + return _Entity + elif c_node.type == tree.XML_PI_NODE: + if state is None or (<ElementDefaultClassLookup>state).pi_class is None: + # special case XSLT-PI + if c_node.name is not NULL and c_node.content is not NULL: + if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0: + if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \ + tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL: + return _XSLTProcessingInstruction + return _ProcessingInstruction + else: + return (<ElementDefaultClassLookup>state).pi_class + else: + assert False, f"Unknown node type: {c_node.type}" + + +################################################################################ +# attribute based lookup scheme + +cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): + """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None) + Checks an attribute of an Element and looks up the value in a + class dictionary. + + Arguments: + - attribute name - '{ns}name' style string + - class mapping - Python dict mapping attribute values to Element classes + - fallback - optional fallback lookup mechanism + + A None key in the class mapping will be checked if the attribute is + missing. + """ + cdef object _class_mapping + cdef tuple _pytag + cdef const_xmlChar* _c_ns + cdef const_xmlChar* _c_name + def __cinit__(self): + self._lookup_function = _attribute_class_lookup + + def __init__(self, attribute_name, class_mapping, + ElementClassLookup fallback=None): + self._pytag = _getNsTag(attribute_name) + ns, name = self._pytag + if ns is None: + self._c_ns = NULL + else: + self._c_ns = _xcstr(ns) + self._c_name = _xcstr(name) + self._class_mapping = dict(class_mapping) + + FallbackElementClassLookup.__init__(self, fallback) + +cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): + cdef AttributeBasedElementClassLookup lookup + cdef python.PyObject* dict_result + + lookup = <AttributeBasedElementClassLookup>state + if c_node.type == tree.XML_ELEMENT_NODE: + value = _attributeValueFromNsName( + c_node, lookup._c_ns, lookup._c_name) + dict_result = python.PyDict_GetItem(lookup._class_mapping, value) + if dict_result is not NULL: + cls = <object>dict_result + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + + +################################################################################ +# per-parser lookup scheme + +cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): + """ParserBasedElementClassLookup(self, fallback=None) + Element class lookup based on the XML parser. + """ + def __cinit__(self): + self._lookup_function = _parser_class_lookup + +cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): + if doc._parser._class_lookup is not None: + return doc._parser._class_lookup._lookup_function( + doc._parser._class_lookup, doc, c_node) + return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node) + + +################################################################################ +# custom class lookup based on node type, namespace, name + +cdef class CustomElementClassLookup(FallbackElementClassLookup): + """CustomElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. + + You can inherit from this class and override the method:: + + lookup(self, type, doc, namespace, name) + + to lookup the element class for a node. Arguments of the method: + * type: one of 'element', 'comment', 'PI', 'entity' + * doc: document that the node is in + * namespace: namespace URI of the node (or None for comments/PIs/entities) + * name: name of the element/entity, None for comments, target for PIs + + If you return None from this method, the fallback will be called. + """ + def __cinit__(self): + self._lookup_function = _custom_class_lookup + + def lookup(self, type, doc, namespace, name): + "lookup(self, type, doc, namespace, name)" + return None + +cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): + cdef CustomElementClassLookup lookup + + lookup = <CustomElementClassLookup>state + + if c_node.type == tree.XML_ELEMENT_NODE: + element_type = "element" + elif c_node.type == tree.XML_COMMENT_NODE: + element_type = "comment" + elif c_node.type == tree.XML_PI_NODE: + element_type = "PI" + elif c_node.type == tree.XML_ENTITY_REF_NODE: + element_type = "entity" + else: + element_type = "element" + if c_node.name is NULL: + name = None + else: + name = funicode(c_node.name) + c_str = tree._getNs(c_node) + ns = funicode(c_str) if c_str is not NULL else None + + cls = lookup.lookup(element_type, doc, ns, name) + if cls is not None: + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + + +################################################################################ +# read-only tree based class lookup + +cdef class PythonElementClassLookup(FallbackElementClassLookup): + """PythonElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. + + This class lookup scheme allows access to the entire XML tree in + read-only mode. To use it, re-implement the ``lookup(self, doc, + root)`` method in a subclass:: + + from lxml import etree, pyclasslookup + + class MyElementClass(etree.ElementBase): + honkey = True + + class MyLookup(pyclasslookup.PythonElementClassLookup): + def lookup(self, doc, root): + if root.tag == "sometag": + return MyElementClass + else: + for child in root: + if child.tag == "someothertag": + return MyElementClass + # delegate to default + return None + + If you return None from this method, the fallback will be called. + + The first argument is the opaque document instance that contains + the Element. The second argument is a lightweight Element proxy + implementation that is only valid during the lookup. Do not try + to keep a reference to it. Once the lookup is done, the proxy + will be invalid. + + Also, you cannot wrap such a read-only Element in an ElementTree, + and you must take care not to keep a reference to them outside of + the `lookup()` method. + + Note that the API of the Element objects is not complete. It is + purely read-only and does not support all features of the normal + `lxml.etree` API (such as XPath, extended slicing or some + iteration methods). + + See https://lxml.de/element_classes.html + """ + def __cinit__(self): + self._lookup_function = _python_class_lookup + + def lookup(self, doc, element): + """lookup(self, doc, element) + + Override this method to implement your own lookup scheme. + """ + return None + +cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node): + cdef PythonElementClassLookup lookup + cdef _ReadOnlyProxy proxy + lookup = <PythonElementClassLookup>state + + proxy = _newReadOnlyProxy(None, c_node) + cls = lookup.lookup(doc, proxy) + _freeReadOnlyProxies(proxy) + + if cls is not None: + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + +################################################################################ +# Global setup + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +cdef object ELEMENT_CLASS_LOOKUP_STATE + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE + if function is NULL: + state = DEFAULT_ELEMENT_CLASS_LOOKUP + function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function + + ELEMENT_CLASS_LOOKUP_STATE = state + LOOKUP_ELEMENT_CLASS = function + +def set_element_class_lookup(ElementClassLookup lookup = None): + """set_element_class_lookup(lookup = None) + + Set the global element class lookup method. + + This defines the main entry point for looking up element implementations. + The standard implementation uses the :class:`ParserBasedElementClassLookup` + to delegate to different lookup schemes for each parser. + + .. warning:: + + This should only be changed by applications, not by library packages. + In most cases, parser specific lookups should be preferred, + which can be configured via + :meth:`~lxml.etree.XMLParser.set_element_class_lookup` + (and the same for HTML parsers). + + Globally replacing the element class lookup by something other than a + :class:`ParserBasedElementClassLookup` will prevent parser specific lookup + schemes from working. Several tools rely on parser specific lookups, + including :mod:`lxml.html` and :mod:`lxml.objectify`. + """ + if lookup is None or lookup._lookup_function is NULL: + _setElementClassLookupFunction(NULL, None) + else: + _setElementClassLookupFunction(lookup._lookup_function, lookup) + +# default setup: parser delegation +cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup() + +set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP) diff --git a/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi b/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi new file mode 100644 index 00000000..8e266b33 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/cleanup.pxi @@ -0,0 +1,215 @@ +# functions for tree cleanup and removing elements from subtrees + +def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None): + """cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None) + + Remove all namespace declarations from a subtree that are not used + by any of the elements or attributes in that tree. + + If a 'top_nsmap' is provided, it must be a mapping from prefixes + to namespace URIs. These namespaces will be declared on the top + element of the subtree before running the cleanup, which allows + moving namespace declarations to the top of the tree. + + If a 'keep_ns_prefixes' is provided, it must be a list of prefixes. + These prefixes will not be removed as part of the cleanup. + """ + element = _rootNodeOrRaise(tree_or_element) + c_element = element._c_node + + if top_nsmap: + doc = element._doc + # declare namespaces from nsmap, then apply them to the subtree + _setNodeNamespaces(c_element, doc, None, top_nsmap) + moveNodeToDocument(doc, c_element.doc, c_element) + + keep_ns_prefixes = ( + set([_utf8(prefix) for prefix in keep_ns_prefixes]) + if keep_ns_prefixes else None) + + _removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes) + + +def strip_attributes(tree_or_element, *attribute_names): + """strip_attributes(tree_or_element, *attribute_names) + + Delete all attributes with the provided attribute names from an + Element (or ElementTree) and its descendants. + + Attribute names can contain wildcards as in `_Element.iter`. + + Example usage:: + + strip_attributes(root_element, + 'simpleattr', + '{http://some/ns}attrname', + '{http://other/ns}*') + """ + cdef _MultiTagMatcher matcher + element = _rootNodeOrRaise(tree_or_element) + if not attribute_names: + return + + matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names) + matcher.cacheTags(element._doc) + if matcher.rejectsAllAttributes(): + return + _strip_attributes(element._c_node, matcher) + + +cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher): + cdef xmlAttr* c_attr + cdef xmlAttr* c_next_attr + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + c_attr = c_node.properties + while c_attr is not NULL: + c_next_attr = c_attr.next + if matcher.matchesAttribute(c_attr): + tree.xmlRemoveProp(c_attr) + c_attr = c_next_attr + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + + +def strip_elements(tree_or_element, *tag_names, bint with_tail=True): + """strip_elements(tree_or_element, *tag_names, with_tail=True) + + Delete all elements with the provided tag names from a tree or + subtree. This will remove the elements and their entire subtree, + including all their attributes, text content and descendants. It + will also remove the tail text of the element unless you + explicitly set the ``with_tail`` keyword argument option to False. + + Tag names can contain wildcards as in `_Element.iter`. + + Note that this will not delete the element (or ElementTree root + element) that you passed even if it matches. It will only treat + its descendants. If you want to include the root element, check + its tag name directly before even calling this function. + + Example usage:: + + strip_elements(some_element, + 'simpletagname', # non-namespaced tag + '{http://some/ns}tagname', # namespaced tag + '{http://some/other/ns}*' # any tag from a namespace + lxml.etree.Comment # comments + ) + """ + cdef _MultiTagMatcher matcher + doc = _documentOrRaise(tree_or_element) + element = _rootNodeOrRaise(tree_or_element) + if not tag_names: + return + + matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names) + matcher.cacheTags(doc) + if matcher.rejectsAll(): + return + + if isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if matcher.matchesType(tree.XML_COMMENT_NODE): + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail) + if matcher.matchesType(tree.XML_PI_NODE): + _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail) + _strip_elements(doc, element._c_node, matcher, with_tail) + +cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher, + bint with_tail): + cdef xmlNode* c_child + cdef xmlNode* c_next + + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + # we run through the children here to prevent any problems + # with the tree iteration which would occur if we unlinked the + # c_node itself + c_child = _findChildForwards(c_node, 0) + while c_child is not NULL: + c_next = _nextElement(c_child) + if matcher.matches(c_child): + if c_child.type == tree.XML_ELEMENT_NODE: + if not with_tail: + tree.xmlUnlinkNode(c_child) + _removeNode(doc, c_child) + else: + if with_tail: + _removeText(c_child.next) + tree.xmlUnlinkNode(c_child) + attemptDeallocation(c_child) + c_child = c_next + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + + +def strip_tags(tree_or_element, *tag_names): + """strip_tags(tree_or_element, *tag_names) + + Delete all elements with the provided tag names from a tree or + subtree. This will remove the elements and their attributes, but + *not* their text/tail content or descendants. Instead, it will + merge the text content and children of the element into its + parent. + + Tag names can contain wildcards as in `_Element.iter`. + + Note that this will not delete the element (or ElementTree root + element) that you passed even if it matches. It will only treat + its descendants. + + Example usage:: + + strip_tags(some_element, + 'simpletagname', # non-namespaced tag + '{http://some/ns}tagname', # namespaced tag + '{http://some/other/ns}*' # any tag from a namespace + Comment # comments (including their text!) + ) + """ + cdef _MultiTagMatcher matcher + doc = _documentOrRaise(tree_or_element) + element = _rootNodeOrRaise(tree_or_element) + if not tag_names: + return + + matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names) + matcher.cacheTags(doc) + if matcher.rejectsAll(): + return + + if isinstance(tree_or_element, _ElementTree): + # include PIs and comments next to the root node + if matcher.matchesType(tree.XML_COMMENT_NODE): + _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0) + if matcher.matchesType(tree.XML_PI_NODE): + _removeSiblings(element._c_node, tree.XML_PI_NODE, 0) + _strip_tags(doc, element._c_node, matcher) + +cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher): + cdef xmlNode* c_child + cdef xmlNode* c_next + + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + # we run through the children here to prevent any problems + # with the tree iteration which would occur if we unlinked the + # c_node itself + c_child = _findChildForwards(c_node, 0) + while c_child is not NULL: + if not matcher.matches(c_child): + c_child = _nextElement(c_child) + continue + if c_child.type == tree.XML_ELEMENT_NODE: + c_next = _findChildForwards(c_child, 0) or _nextElement(c_child) + _replaceNodeByChildren(doc, c_child) + if not attemptDeallocation(c_child): + if c_child.nsDef is not NULL: + # make namespaces absolute + moveNodeToDocument(doc, doc._c_doc, c_child) + c_child = c_next + else: + c_next = _nextElement(c_child) + tree.xmlUnlinkNode(c_child) + attemptDeallocation(c_child) + c_child = c_next + tree.END_FOR_EACH_ELEMENT_FROM(c_node) diff --git a/.venv/lib/python3.12/site-packages/lxml/cssselect.py b/.venv/lib/python3.12/site-packages/lxml/cssselect.py new file mode 100644 index 00000000..54cd75ac --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/cssselect.py @@ -0,0 +1,101 @@ +"""CSS Selectors based on XPath. + +This module supports selecting XML/HTML tags based on CSS selectors. +See the `CSSSelector` class for details. + +This is a thin wrapper around cssselect 0.7 or later. +""" + + +from . import etree +try: + import cssselect as external_cssselect +except ImportError: + raise ImportError( + 'cssselect does not seem to be installed. ' + 'See https://pypi.org/project/cssselect/') + + +SelectorSyntaxError = external_cssselect.SelectorSyntaxError +ExpressionError = external_cssselect.ExpressionError +SelectorError = external_cssselect.SelectorError + + +__all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError', + 'CSSSelector'] + + +class LxmlTranslator(external_cssselect.GenericTranslator): + """ + A custom CSS selector to XPath translator with lxml-specific extensions. + """ + def xpath_contains_function(self, xpath, function): + # Defined there, removed in later drafts: + # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors + if function.argument_types() not in (['STRING'], ['IDENT']): + raise ExpressionError( + "Expected a single string or ident for :contains(), got %r" + % function.arguments) + value = function.arguments[0].value + return xpath.add_condition( + 'contains(__lxml_internal_css:lower-case(string(.)), %s)' + % self.xpath_literal(value.lower())) + + +class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator): + """ + lxml extensions + HTML support. + """ + + +def _make_lower_case(context, s): + return s.lower() + +ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/') +ns.prefix = '__lxml_internal_css' +ns['lower-case'] = _make_lower_case + + +class CSSSelector(etree.XPath): + """A CSS selector. + + Usage:: + + >>> from lxml import etree, cssselect + >>> select = cssselect.CSSSelector("a tag > child") + + >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>") + >>> [ el.tag for el in select(root) ] + ['child'] + + To use CSS namespaces, you need to pass a prefix-to-namespace + mapping as ``namespaces`` keyword argument:: + + >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#' + >>> select_ns = cssselect.CSSSelector('root > rdf|Description', + ... namespaces={'rdf': rdfns}) + + >>> rdf = etree.XML(( + ... '<root xmlns:rdf="%s">' + ... '<rdf:Description>blah</rdf:Description>' + ... '</root>') % rdfns) + >>> [(el.tag, el.text) for el in select_ns(rdf)] + [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')] + + """ + def __init__(self, css, namespaces=None, translator='xml'): + if translator == 'xml': + translator = LxmlTranslator() + elif translator == 'html': + translator = LxmlHTMLTranslator() + elif translator == 'xhtml': + translator = LxmlHTMLTranslator(xhtml=True) + path = translator.css_to_xpath(css) + super().__init__(path, namespaces=namespaces) + self.css = css + + def __repr__(self): + return '<%s %x for %r>' % ( + self.__class__.__name__, + abs(id(self)), + self.css) diff --git a/.venv/lib/python3.12/site-packages/lxml/debug.pxi b/.venv/lib/python3.12/site-packages/lxml/debug.pxi new file mode 100644 index 00000000..e5bb0619 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/debug.pxi @@ -0,0 +1,90 @@ +@cython.final +@cython.internal +cdef class _MemDebug: + """Debugging support for the memory allocation in libxml2. + """ + def bytes_used(self): + """bytes_used(self) + + Returns the total amount of memory (in bytes) currently used by libxml2. + Note that libxml2 constrains this value to a C int, which limits + the accuracy on 64 bit systems. + """ + return tree.xmlMemUsed() + + def blocks_used(self): + """blocks_used(self) + + Returns the total number of memory blocks currently allocated by libxml2. + Note that libxml2 constrains this value to a C int, which limits + the accuracy on 64 bit systems. + """ + return tree.xmlMemBlocks() + + def dict_size(self): + """dict_size(self) + + Returns the current size of the global name dictionary used by libxml2 + for the current thread. Each thread has its own dictionary. + """ + c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL) + if c_dict is NULL: + raise MemoryError() + return tree.xmlDictSize(c_dict) + + def dump(self, output_file=None, byte_count=None): + """dump(self, output_file=None, byte_count=None) + + Dumps the current memory blocks allocated by libxml2 to a file. + + The optional parameter 'output_file' specifies the file path. It defaults + to the file ".memorylist" in the current directory. + + The optional parameter 'byte_count' limits the number of bytes in the dump. + Note that this parameter is ignored when lxml is compiled against a libxml2 + version before 2.7.0. + """ + cdef Py_ssize_t c_count + if output_file is None: + output_file = b'.memorylist' + elif isinstance(output_file, unicode): + output_file.encode(sys.getfilesystemencoding()) + + f = stdio.fopen(output_file, "w") + if f is NULL: + raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}") + try: + if byte_count is None: + tree.xmlMemDisplay(f) + else: + c_count = byte_count + tree.xmlMemDisplayLast(f, c_count) + finally: + stdio.fclose(f) + + def show(self, output_file=None, block_count=None): + """show(self, output_file=None, block_count=None) + + Dumps the current memory blocks allocated by libxml2 to a file. + The output file format is suitable for line diffing. + + The optional parameter 'output_file' specifies the file path. It defaults + to the file ".memorydump" in the current directory. + + The optional parameter 'block_count' limits the number of blocks + in the dump. + """ + if output_file is None: + output_file = b'.memorydump' + elif isinstance(output_file, unicode): + output_file.encode(sys.getfilesystemencoding()) + + f = stdio.fopen(output_file, "w") + if f is NULL: + raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}") + try: + tree.xmlMemShow(f, block_count if block_count is not None else tree.xmlMemBlocks()) + finally: + stdio.fclose(f) + +memory_debugger = _MemDebug() diff --git a/.venv/lib/python3.12/site-packages/lxml/docloader.pxi b/.venv/lib/python3.12/site-packages/lxml/docloader.pxi new file mode 100644 index 00000000..7b38f438 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/docloader.pxi @@ -0,0 +1,178 @@ +# Custom resolver API + +ctypedef enum _InputDocumentDataType: + PARSER_DATA_INVALID + PARSER_DATA_EMPTY + PARSER_DATA_STRING + PARSER_DATA_FILENAME + PARSER_DATA_FILE + +@cython.final +@cython.internal +cdef class _InputDocument: + cdef _InputDocumentDataType _type + cdef bytes _data_bytes + cdef object _filename + cdef object _file + cdef bint _close_file + + def __cinit__(self): + self._type = PARSER_DATA_INVALID + + +cdef class Resolver: + "This is the base class of all resolvers." + def resolve(self, system_url, public_id, context): + """resolve(self, system_url, public_id, context) + + Override this method to resolve an external source by + ``system_url`` and ``public_id``. The third argument is an + opaque context object. + + Return the result of one of the ``resolve_*()`` methods. + """ + return None + + def resolve_empty(self, context): + """resolve_empty(self, context) + + Return an empty input document. + + Pass context as parameter. + """ + cdef _InputDocument doc_ref + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_EMPTY + return doc_ref + + def resolve_string(self, string, context, *, base_url=None): + """resolve_string(self, string, context, base_url=None) + + Return a parsable string as input document. + + Pass data string and context as parameters. You can pass the + source URL or filename through the ``base_url`` keyword + argument. + """ + cdef _InputDocument doc_ref + if isinstance(string, unicode): + string = (<unicode>string).encode('utf8') + elif not isinstance(string, bytes): + raise TypeError, "argument must be a byte string or unicode string" + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_STRING + doc_ref._data_bytes = string + if base_url is not None: + doc_ref._filename = _encodeFilename(base_url) + return doc_ref + + def resolve_filename(self, filename, context): + """resolve_filename(self, filename, context) + + Return the name of a parsable file as input document. + + Pass filename and context as parameters. You can also pass a + URL with an HTTP, FTP or file target. + """ + cdef _InputDocument doc_ref + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_FILENAME + doc_ref._filename = _encodeFilename(filename) + return doc_ref + + def resolve_file(self, f, context, *, base_url=None, bint close=True): + """resolve_file(self, f, context, base_url=None, close=True) + + Return an open file-like object as input document. + + Pass open file and context as parameters. You can pass the + base URL or filename of the file through the ``base_url`` + keyword argument. If the ``close`` flag is True (the + default), the file will be closed after reading. + + Note that using ``.resolve_filename()`` is more efficient, + especially in threaded environments. + """ + cdef _InputDocument doc_ref + try: + f.read + except AttributeError: + raise TypeError, "Argument is not a file-like object" + doc_ref = _InputDocument() + doc_ref._type = PARSER_DATA_FILE + if base_url is not None: + doc_ref._filename = _encodeFilename(base_url) + else: + doc_ref._filename = _getFilenameForFile(f) + doc_ref._close_file = close + doc_ref._file = f + return doc_ref + +@cython.final +@cython.internal +cdef class _ResolverRegistry: + cdef object _resolvers + cdef Resolver _default_resolver + def __cinit__(self, Resolver default_resolver=None): + self._resolvers = set() + self._default_resolver = default_resolver + + def add(self, Resolver resolver not None): + """add(self, resolver) + + Register a resolver. + + For each requested entity, the 'resolve' method of the resolver will + be called and the result will be passed to the parser. If this method + returns None, the request will be delegated to other resolvers or the + default resolver. The resolvers will be tested in an arbitrary order + until the first match is found. + """ + self._resolvers.add(resolver) + + def remove(self, resolver): + "remove(self, resolver)" + self._resolvers.discard(resolver) + + cdef _ResolverRegistry _copy(self): + cdef _ResolverRegistry registry + registry = _ResolverRegistry(self._default_resolver) + registry._resolvers = self._resolvers.copy() + return registry + + def copy(self): + "copy(self)" + return self._copy() + + def resolve(self, system_url, public_id, context): + "resolve(self, system_url, public_id, context)" + for resolver in self._resolvers: + result = resolver.resolve(system_url, public_id, context) + if result is not None: + return result + if self._default_resolver is None: + return None + return self._default_resolver.resolve(system_url, public_id, context) + + def __repr__(self): + return repr(self._resolvers) + + +@cython.internal +cdef class _ResolverContext(_ExceptionContext): + cdef _ResolverRegistry _resolvers + cdef _TempStore _storage + + cdef int clear(self) except -1: + _ExceptionContext.clear(self) + self._storage.clear() + return 0 + + +cdef _initResolverContext(_ResolverContext context, + _ResolverRegistry resolvers): + if resolvers is None: + context._resolvers = _ResolverRegistry() + else: + context._resolvers = resolvers + context._storage = _TempStore() diff --git a/.venv/lib/python3.12/site-packages/lxml/doctestcompare.py b/.venv/lib/python3.12/site-packages/lxml/doctestcompare.py new file mode 100644 index 00000000..8099771d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/doctestcompare.py @@ -0,0 +1,488 @@ +""" +lxml-based doctest output comparison. + +Note: normally, you should just import the `lxml.usedoctest` and +`lxml.html.usedoctest` modules from within a doctest, instead of this +one:: + + >>> import lxml.usedoctest # for XML output + + >>> import lxml.html.usedoctest # for HTML output + +To use this module directly, you must call ``lxmldoctest.install()``, +which will cause doctest to use this in all subsequent calls. + +This changes the way output is checked and comparisons are made for +XML or HTML-like content. + +XML or HTML content is noticed because the example starts with ``<`` +(it's HTML if it starts with ``<html``). You can also use the +``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing. + +Some rough wildcard-like things are allowed. Whitespace is generally +ignored (except in attributes). In text (attributes and text in the +body) you can use ``...`` as a wildcard. In an example it also +matches any trailing tags in the element, though it does not match +leading tags. You may create a tag ``<any>`` or include an ``any`` +attribute in the tag. An ``any`` tag matches any tag, while the +attribute matches any and all attributes. + +When a match fails, the reformatted example and gotten text is +displayed (indented), and a rough diff-like output is given. Anything +marked with ``+`` is in the output but wasn't supposed to be, and +similarly ``-`` means its in the example but wasn't in the output. + +You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP`` +""" + +from lxml import etree +import sys +import re +import doctest +try: + from html import escape as html_escape +except ImportError: + from cgi import escape as html_escape + +__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker', + 'LHTMLOutputChecker', 'install', 'temp_install'] + +PARSE_HTML = doctest.register_optionflag('PARSE_HTML') +PARSE_XML = doctest.register_optionflag('PARSE_XML') +NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP') + +OutputChecker = doctest.OutputChecker + +def strip(v): + if v is None: + return None + else: + return v.strip() + +def norm_whitespace(v): + return _norm_whitespace_re.sub(' ', v) + +_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True) + +def html_fromstring(html): + return etree.fromstring(html, _html_parser) + +# We use this to distinguish repr()s from elements: +_repr_re = re.compile(r'^<[^>]+ (at|object) ') +_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+') + +class LXMLOutputChecker(OutputChecker): + + empty_tags = ( + 'param', 'img', 'area', 'br', 'basefont', 'input', + 'base', 'meta', 'link', 'col') + + def get_default_parser(self): + return etree.XML + + def check_output(self, want, got, optionflags): + alt_self = getattr(self, '_temp_override_self', None) + if alt_self is not None: + super_method = self._temp_call_super_check_output + self = alt_self + else: + super_method = OutputChecker.check_output + parser = self.get_parser(want, got, optionflags) + if not parser: + return super_method( + self, want, got, optionflags) + try: + want_doc = parser(want) + except etree.XMLSyntaxError: + return False + try: + got_doc = parser(got) + except etree.XMLSyntaxError: + return False + return self.compare_docs(want_doc, got_doc) + + def get_parser(self, want, got, optionflags): + parser = None + if NOPARSE_MARKUP & optionflags: + return None + if PARSE_HTML & optionflags: + parser = html_fromstring + elif PARSE_XML & optionflags: + parser = etree.XML + elif (want.strip().lower().startswith('<html') + and got.strip().startswith('<html')): + parser = html_fromstring + elif (self._looks_like_markup(want) + and self._looks_like_markup(got)): + parser = self.get_default_parser() + return parser + + def _looks_like_markup(self, s): + s = s.strip() + return (s.startswith('<') + and not _repr_re.search(s)) + + def compare_docs(self, want, got): + if not self.tag_compare(want.tag, got.tag): + return False + if not self.text_compare(want.text, got.text, True): + return False + if not self.text_compare(want.tail, got.tail, True): + return False + if 'any' not in want.attrib: + want_keys = sorted(want.attrib.keys()) + got_keys = sorted(got.attrib.keys()) + if want_keys != got_keys: + return False + for key in want_keys: + if not self.text_compare(want.attrib[key], got.attrib[key], False): + return False + if want.text != '...' or len(want): + want_children = list(want) + got_children = list(got) + while want_children or got_children: + if not want_children or not got_children: + return False + want_first = want_children.pop(0) + got_first = got_children.pop(0) + if not self.compare_docs(want_first, got_first): + return False + if not got_children and want_first.tail == '...': + break + return True + + def text_compare(self, want, got, strip): + want = want or '' + got = got or '' + if strip: + want = norm_whitespace(want).strip() + got = norm_whitespace(got).strip() + want = '^%s$' % re.escape(want) + want = want.replace(r'\.\.\.', '.*') + if re.search(want, got): + return True + else: + return False + + def tag_compare(self, want, got): + if want == 'any': + return True + if (not isinstance(want, (str, bytes)) + or not isinstance(got, (str, bytes))): + return want == got + want = want or '' + got = got or '' + if want.startswith('{...}'): + # Ellipsis on the namespace + return want.split('}')[-1] == got.split('}')[-1] + else: + return want == got + + def output_difference(self, example, got, optionflags): + want = example.want + parser = self.get_parser(want, got, optionflags) + errors = [] + if parser is not None: + try: + want_doc = parser(want) + except etree.XMLSyntaxError: + e = sys.exc_info()[1] + errors.append('In example: %s' % e) + try: + got_doc = parser(got) + except etree.XMLSyntaxError: + e = sys.exc_info()[1] + errors.append('In actual output: %s' % e) + if parser is None or errors: + value = OutputChecker.output_difference( + self, example, got, optionflags) + if errors: + errors.append(value) + return '\n'.join(errors) + else: + return value + html = parser is html_fromstring + diff_parts = ['Expected:', + self.format_doc(want_doc, html, 2), + 'Got:', + self.format_doc(got_doc, html, 2), + 'Diff:', + self.collect_diff(want_doc, got_doc, html, 2)] + return '\n'.join(diff_parts) + + def html_empty_tag(self, el, html=True): + if not html: + return False + if el.tag not in self.empty_tags: + return False + if el.text or len(el): + # This shouldn't happen (contents in an empty tag) + return False + return True + + def format_doc(self, doc, html, indent, prefix=''): + parts = [] + if not len(doc): + # No children... + parts.append(' '*indent) + parts.append(prefix) + parts.append(self.format_tag(doc)) + if not self.html_empty_tag(doc, html): + if strip(doc.text): + parts.append(self.format_text(doc.text)) + parts.append(self.format_end_tag(doc)) + if strip(doc.tail): + parts.append(self.format_text(doc.tail)) + parts.append('\n') + return ''.join(parts) + parts.append(' '*indent) + parts.append(prefix) + parts.append(self.format_tag(doc)) + if not self.html_empty_tag(doc, html): + parts.append('\n') + if strip(doc.text): + parts.append(' '*indent) + parts.append(self.format_text(doc.text)) + parts.append('\n') + for el in doc: + parts.append(self.format_doc(el, html, indent+2)) + parts.append(' '*indent) + parts.append(self.format_end_tag(doc)) + parts.append('\n') + if strip(doc.tail): + parts.append(' '*indent) + parts.append(self.format_text(doc.tail)) + parts.append('\n') + return ''.join(parts) + + def format_text(self, text, strip=True): + if text is None: + return '' + if strip: + text = text.strip() + return html_escape(text, 1) + + def format_tag(self, el): + attrs = [] + if isinstance(el, etree.CommentBase): + # FIXME: probably PIs should be handled specially too? + return '<!--' + for name, value in sorted(el.attrib.items()): + attrs.append('%s="%s"' % (name, self.format_text(value, False))) + if not attrs: + return '<%s>' % el.tag + return '<%s %s>' % (el.tag, ' '.join(attrs)) + + def format_end_tag(self, el): + if isinstance(el, etree.CommentBase): + # FIXME: probably PIs should be handled specially too? + return '-->' + return '</%s>' % el.tag + + def collect_diff(self, want, got, html, indent): + parts = [] + if not len(want) and not len(got): + parts.append(' '*indent) + parts.append(self.collect_diff_tag(want, got)) + if not self.html_empty_tag(got, html): + parts.append(self.collect_diff_text(want.text, got.text)) + parts.append(self.collect_diff_end_tag(want, got)) + parts.append(self.collect_diff_text(want.tail, got.tail)) + parts.append('\n') + return ''.join(parts) + parts.append(' '*indent) + parts.append(self.collect_diff_tag(want, got)) + parts.append('\n') + if strip(want.text) or strip(got.text): + parts.append(' '*indent) + parts.append(self.collect_diff_text(want.text, got.text)) + parts.append('\n') + want_children = list(want) + got_children = list(got) + while want_children or got_children: + if not want_children: + parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+')) + continue + if not got_children: + parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-')) + continue + parts.append(self.collect_diff( + want_children.pop(0), got_children.pop(0), html, indent+2)) + parts.append(' '*indent) + parts.append(self.collect_diff_end_tag(want, got)) + parts.append('\n') + if strip(want.tail) or strip(got.tail): + parts.append(' '*indent) + parts.append(self.collect_diff_text(want.tail, got.tail)) + parts.append('\n') + return ''.join(parts) + + def collect_diff_tag(self, want, got): + if not self.tag_compare(want.tag, got.tag): + tag = '%s (got: %s)' % (want.tag, got.tag) + else: + tag = got.tag + attrs = [] + any = want.tag == 'any' or 'any' in want.attrib + for name, value in sorted(got.attrib.items()): + if name not in want.attrib and not any: + attrs.append('+%s="%s"' % (name, self.format_text(value, False))) + else: + if name in want.attrib: + text = self.collect_diff_text(want.attrib[name], value, False) + else: + text = self.format_text(value, False) + attrs.append('%s="%s"' % (name, text)) + if not any: + for name, value in sorted(want.attrib.items()): + if name in got.attrib: + continue + attrs.append('-%s="%s"' % (name, self.format_text(value, False))) + if attrs: + tag = '<%s %s>' % (tag, ' '.join(attrs)) + else: + tag = '<%s>' % tag + return tag + + def collect_diff_end_tag(self, want, got): + if want.tag != got.tag: + tag = '%s (got: %s)' % (want.tag, got.tag) + else: + tag = got.tag + return '</%s>' % tag + + def collect_diff_text(self, want, got, strip=True): + if self.text_compare(want, got, strip): + if not got: + return '' + return self.format_text(got, strip) + text = '%s (got: %s)' % (want, got) + return self.format_text(text, strip) + +class LHTMLOutputChecker(LXMLOutputChecker): + def get_default_parser(self): + return html_fromstring + +def install(html=False): + """ + Install doctestcompare for all future doctests. + + If html is true, then by default the HTML parser will be used; + otherwise the XML parser is used. + """ + if html: + doctest.OutputChecker = LHTMLOutputChecker + else: + doctest.OutputChecker = LXMLOutputChecker + +def temp_install(html=False, del_module=None): + """ + Use this *inside* a doctest to enable this checker for this + doctest only. + + If html is true, then by default the HTML parser will be used; + otherwise the XML parser is used. + """ + if html: + Checker = LHTMLOutputChecker + else: + Checker = LXMLOutputChecker + frame = _find_doctest_frame() + dt_self = frame.f_locals['self'] + checker = Checker() + old_checker = dt_self._checker + dt_self._checker = checker + # The unfortunate thing is that there is a local variable 'check' + # in the function that runs the doctests, that is a bound method + # into the output checker. We have to update that. We can't + # modify the frame, so we have to modify the object in place. The + # only way to do this is to actually change the func_code + # attribute of the method. We change it, and then wait for + # __record_outcome to be run, which signals the end of the __run + # method, at which point we restore the previous check_output + # implementation. + check_func = frame.f_locals['check'].__func__ + checker_check_func = checker.check_output.__func__ + # Because we can't patch up func_globals, this is the only global + # in check_output that we care about: + doctest.etree = etree + _RestoreChecker(dt_self, old_checker, checker, + check_func, checker_check_func, + del_module) + +class _RestoreChecker: + def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func, + del_module): + self.dt_self = dt_self + self.checker = old_checker + self.checker._temp_call_super_check_output = self.call_super + self.checker._temp_override_self = new_checker + self.check_func = check_func + self.clone_func = clone_func + self.del_module = del_module + self.install_clone() + self.install_dt_self() + def install_clone(self): + self.func_code = self.check_func.__code__ + self.func_globals = self.check_func.__globals__ + self.check_func.__code__ = self.clone_func.__code__ + def uninstall_clone(self): + self.check_func.__code__ = self.func_code + def install_dt_self(self): + self.prev_func = self.dt_self._DocTestRunner__record_outcome + self.dt_self._DocTestRunner__record_outcome = self + def uninstall_dt_self(self): + self.dt_self._DocTestRunner__record_outcome = self.prev_func + def uninstall_module(self): + if self.del_module: + import sys + del sys.modules[self.del_module] + if '.' in self.del_module: + package, module = self.del_module.rsplit('.', 1) + package_mod = sys.modules[package] + delattr(package_mod, module) + def __call__(self, *args, **kw): + self.uninstall_clone() + self.uninstall_dt_self() + del self.checker._temp_override_self + del self.checker._temp_call_super_check_output + result = self.prev_func(*args, **kw) + self.uninstall_module() + return result + def call_super(self, *args, **kw): + self.uninstall_clone() + try: + return self.check_func(*args, **kw) + finally: + self.install_clone() + +def _find_doctest_frame(): + import sys + frame = sys._getframe(1) + while frame: + l = frame.f_locals + if 'BOOM' in l: + # Sign of doctest + return frame + frame = frame.f_back + raise LookupError( + "Could not find doctest (only use this function *inside* a doctest)") + +__test__ = { + 'basic': ''' + >>> temp_install() + >>> print """<xml a="1" b="2">stuff</xml>""" + <xml b="2" a="1">...</xml> + >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>""" + <xml xmlns="..."> + <tag attr="..." /> + </xml> + >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS + <xml>...foo /></xml> + '''} + +if __name__ == '__main__': + import doctest + doctest.testmod() + + diff --git a/.venv/lib/python3.12/site-packages/lxml/dtd.pxi b/.venv/lib/python3.12/site-packages/lxml/dtd.pxi new file mode 100644 index 00000000..ee1b3d47 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/dtd.pxi @@ -0,0 +1,479 @@ +# support for DTD validation +from lxml.includes cimport dtdvalid + +cdef class DTDError(LxmlError): + """Base class for DTD errors. + """ + +cdef class DTDParseError(DTDError): + """Error while parsing a DTD. + """ + +cdef class DTDValidateError(DTDError): + """Error while validating an XML document with a DTD. + """ + + +cdef inline int _assertValidDTDNode(node, void *c_node) except -1: + assert c_node is not NULL, "invalid DTD proxy at %s" % id(node) + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _DTDElementContentDecl: + cdef DTD _dtd + cdef tree.xmlElementContent* _c_node + + def __repr__(self): + return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self)) + + @property + def name(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.name) + + @property + def type(self): + _assertValidDTDNode(self, self._c_node) + cdef int type = self._c_node.type + if type == tree.XML_ELEMENT_CONTENT_PCDATA: + return "pcdata" + elif type == tree.XML_ELEMENT_CONTENT_ELEMENT: + return "element" + elif type == tree.XML_ELEMENT_CONTENT_SEQ: + return "seq" + elif type == tree.XML_ELEMENT_CONTENT_OR: + return "or" + else: + return None + + @property + def occur(self): + _assertValidDTDNode(self, self._c_node) + cdef int occur = self._c_node.ocur + if occur == tree.XML_ELEMENT_CONTENT_ONCE: + return "once" + elif occur == tree.XML_ELEMENT_CONTENT_OPT: + return "opt" + elif occur == tree.XML_ELEMENT_CONTENT_MULT: + return "mult" + elif occur == tree.XML_ELEMENT_CONTENT_PLUS: + return "plus" + else: + return None + + @property + def left(self): + _assertValidDTDNode(self, self._c_node) + c1 = self._c_node.c1 + if c1: + node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl) + node._dtd = self._dtd + node._c_node = <tree.xmlElementContent*>c1 + return node + else: + return None + + @property + def right(self): + _assertValidDTDNode(self, self._c_node) + c2 = self._c_node.c2 + if c2: + node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl) + node._dtd = self._dtd + node._c_node = <tree.xmlElementContent*>c2 + return node + else: + return None + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _DTDAttributeDecl: + cdef DTD _dtd + cdef tree.xmlAttribute* _c_node + + def __repr__(self): + return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self)) + + @property + def name(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.name) + + @property + def elemname(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.elem) + + @property + def prefix(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.prefix) + + @property + def type(self): + _assertValidDTDNode(self, self._c_node) + cdef int type = self._c_node.atype + if type == tree.XML_ATTRIBUTE_CDATA: + return "cdata" + elif type == tree.XML_ATTRIBUTE_ID: + return "id" + elif type == tree.XML_ATTRIBUTE_IDREF: + return "idref" + elif type == tree.XML_ATTRIBUTE_IDREFS: + return "idrefs" + elif type == tree.XML_ATTRIBUTE_ENTITY: + return "entity" + elif type == tree.XML_ATTRIBUTE_ENTITIES: + return "entities" + elif type == tree.XML_ATTRIBUTE_NMTOKEN: + return "nmtoken" + elif type == tree.XML_ATTRIBUTE_NMTOKENS: + return "nmtokens" + elif type == tree.XML_ATTRIBUTE_ENUMERATION: + return "enumeration" + elif type == tree.XML_ATTRIBUTE_NOTATION: + return "notation" + else: + return None + + @property + def default(self): + _assertValidDTDNode(self, self._c_node) + cdef int default = self._c_node.def_ + if default == tree.XML_ATTRIBUTE_NONE: + return "none" + elif default == tree.XML_ATTRIBUTE_REQUIRED: + return "required" + elif default == tree.XML_ATTRIBUTE_IMPLIED: + return "implied" + elif default == tree.XML_ATTRIBUTE_FIXED: + return "fixed" + else: + return None + + @property + def default_value(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.defaultValue) + + def itervalues(self): + _assertValidDTDNode(self, self._c_node) + cdef tree.xmlEnumeration *c_node = self._c_node.tree + while c_node is not NULL: + yield funicode(c_node.name) + c_node = c_node.next + + def values(self): + return list(self.itervalues()) + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _DTDElementDecl: + cdef DTD _dtd + cdef tree.xmlElement* _c_node + + def __repr__(self): + return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self)) + + @property + def name(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.name) + + @property + def prefix(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.prefix) + + @property + def type(self): + _assertValidDTDNode(self, self._c_node) + cdef int type = self._c_node.etype + if type == tree.XML_ELEMENT_TYPE_UNDEFINED: + return "undefined" + elif type == tree.XML_ELEMENT_TYPE_EMPTY: + return "empty" + elif type == tree.XML_ELEMENT_TYPE_ANY: + return "any" + elif type == tree.XML_ELEMENT_TYPE_MIXED: + return "mixed" + elif type == tree.XML_ELEMENT_TYPE_ELEMENT: + return "element" + else: + return None + + @property + def content(self): + _assertValidDTDNode(self, self._c_node) + cdef tree.xmlElementContent *content = self._c_node.content + if content: + node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl) + node._dtd = self._dtd + node._c_node = content + return node + else: + return None + + def iterattributes(self): + _assertValidDTDNode(self, self._c_node) + cdef tree.xmlAttribute *c_node = self._c_node.attributes + while c_node: + node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl) + node._dtd = self._dtd + node._c_node = c_node + yield node + c_node = c_node.nexth + + def attributes(self): + return list(self.iterattributes()) + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _DTDEntityDecl: + cdef DTD _dtd + cdef tree.xmlEntity* _c_node + def __repr__(self): + return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self)) + + @property + def name(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.name) + + @property + def orig(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.orig) + + @property + def content(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.content) + + @property + def system_url(self): + _assertValidDTDNode(self, self._c_node) + return funicodeOrNone(self._c_node.SystemID) + + +################################################################################ +# DTD + +cdef class DTD(_Validator): + """DTD(self, file=None, external_id=None) + A DTD validator. + + Can load from filesystem directly given a filename or file-like object. + Alternatively, pass the keyword parameter ``external_id`` to load from a + catalog. + """ + cdef tree.xmlDtd* _c_dtd + def __init__(self, file=None, *, external_id=None): + _Validator.__init__(self) + if file is not None: + file = _getFSPathOrObject(file) + if _isString(file): + file = _encodeFilename(file) + with self._error_log: + orig_loader = _register_document_loader() + self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file)) + _reset_document_loader(orig_loader) + elif hasattr(file, 'read'): + orig_loader = _register_document_loader() + self._c_dtd = _parseDtdFromFilelike(file) + _reset_document_loader(orig_loader) + else: + raise DTDParseError, "file must be a filename, file-like or path-like object" + elif external_id is not None: + external_id_utf = _utf8(external_id) + with self._error_log: + orig_loader = _register_document_loader() + self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id_utf, NULL) + _reset_document_loader(orig_loader) + else: + raise DTDParseError, "either filename or external ID required" + + if self._c_dtd is NULL: + raise DTDParseError( + self._error_log._buildExceptionMessage("error parsing DTD"), + self._error_log) + + @property + def name(self): + if self._c_dtd is NULL: + return None + return funicodeOrNone(self._c_dtd.name) + + @property + def external_id(self): + if self._c_dtd is NULL: + return None + return funicodeOrNone(self._c_dtd.ExternalID) + + @property + def system_url(self): + if self._c_dtd is NULL: + return None + return funicodeOrNone(self._c_dtd.SystemID) + + def iterelements(self): + cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_DECL: + node = _DTDElementDecl() + node._dtd = self + node._c_node = <tree.xmlElement*>c_node + yield node + c_node = c_node.next + + def elements(self): + return list(self.iterelements()) + + def iterentities(self): + cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL + while c_node is not NULL: + if c_node.type == tree.XML_ENTITY_DECL: + node = _DTDEntityDecl() + node._dtd = self + node._c_node = <tree.xmlEntity*>c_node + yield node + c_node = c_node.next + + def entities(self): + return list(self.iterentities()) + + def __dealloc__(self): + tree.xmlFreeDtd(self._c_dtd) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using the DTD. + + Returns true if the document is valid, false if not. + """ + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef dtdvalid.xmlValidCtxt* valid_ctxt + cdef int ret = -1 + + assert self._c_dtd is not NULL, "DTD not initialised" + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + valid_ctxt = dtdvalid.xmlNewValidCtxt() + if valid_ctxt is NULL: + raise DTDError("Failed to create validation context") + + # work around error reporting bug in libxml2 <= 2.9.1 (and later?) + # https://bugzilla.gnome.org/show_bug.cgi?id=724903 + valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc + valid_ctxt.userData = NULL + + try: + with self._error_log: + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd) + _destroyFakeDoc(doc._c_doc, c_doc) + finally: + dtdvalid.xmlFreeValidCtxt(valid_ctxt) + + if ret == -1: + raise DTDValidateError("Internal error in DTD validation", + self._error_log) + return ret == 1 + + +cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL: + cdef _ExceptionContext exc_context + cdef _FileReaderContext dtd_parser + cdef _ErrorLog error_log + cdef tree.xmlDtd* c_dtd = NULL + exc_context = _ExceptionContext() + dtd_parser = _FileReaderContext(file, exc_context, None) + error_log = _ErrorLog() + + with error_log: + c_dtd = dtd_parser._readDtd() + + exc_context._raise_if_stored() + if c_dtd is NULL: + raise DTDParseError("error parsing DTD", error_log) + return c_dtd + +cdef DTD _dtdFactory(tree.xmlDtd* c_dtd): + # do not run through DTD.__init__()! + cdef DTD dtd + if c_dtd is NULL: + return None + dtd = DTD.__new__(DTD) + dtd._c_dtd = _copyDtd(c_dtd) + _Validator.__init__(dtd) + return dtd + + +cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL: + """ + Copy a DTD. libxml2 (currently) fails to set up the element->attributes + links when copying DTDs, so we have to rebuild them here. + """ + c_dtd = tree.xmlCopyDtd(c_orig_dtd) + if not c_dtd: + raise MemoryError + cdef tree.xmlNode* c_node = c_dtd.children + while c_node: + if c_node.type == tree.XML_ATTRIBUTE_DECL: + _linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node) + c_node = c_node.next + return c_dtd + + +cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr) noexcept: + """ + Create the link to the DTD attribute declaration from the corresponding + element declaration. + """ + c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem) + if not c_elem: + # no such element? something is wrong with the DTD ... + return + c_pos = c_elem.attributes + if not c_pos: + c_elem.attributes = c_attr + c_attr.nexth = NULL + return + # libxml2 keeps namespace declarations first, and we need to make + # sure we don't re-insert attributes that are already there + if _isDtdNsDecl(c_attr): + if not _isDtdNsDecl(c_pos): + c_elem.attributes = c_attr + c_attr.nexth = c_pos + return + while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth): + c_pos = c_pos.nexth + else: + # append at end + while c_pos != c_attr and c_pos.nexth: + c_pos = c_pos.nexth + if c_pos == c_attr: + return + c_attr.nexth = c_pos.nexth + c_pos.nexth = c_attr + + +cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr) noexcept: + if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0: + return True + if (c_attr.prefix is not NULL and + cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0): + return True + return False diff --git a/.venv/lib/python3.12/site-packages/lxml/etree.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/etree.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..29c19ddb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/etree.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/etree.h b/.venv/lib/python3.12/site-packages/lxml/etree.h new file mode 100644 index 00000000..c2a0f5ab --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/etree.h @@ -0,0 +1,248 @@ +/* Generated by Cython 3.0.11 */ + +#ifndef __PYX_HAVE__lxml__etree +#define __PYX_HAVE__lxml__etree + +#include "Python.h" +struct LxmlDocument; +struct LxmlElement; +struct LxmlElementTree; +struct LxmlElementTagMatcher; +struct LxmlElementIterator; +struct LxmlElementBase; +struct LxmlElementClassLookup; +struct LxmlFallbackElementClassLookup; + +/* "lxml/etree.pyx":355 + * + * # type of a function that steps from node to node + * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<< + * + * + */ +typedef xmlNode *(*_node_to_node_function)(xmlNode *); + +/* "lxml/etree.pyx":371 + * @cython.final + * @cython.freelist(8) + * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<< + * """Internal base class to reference a libxml document. + * + */ +struct LxmlDocument { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab; + int _ns_counter; + PyObject *_prefix_tail; + xmlDoc *_c_doc; + struct __pyx_obj_4lxml_5etree__BaseParser *_parser; +}; + +/* "lxml/etree.pyx":720 + * + * @cython.no_gc_clear + * cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<< + * """Element class. + * + */ +struct LxmlElement { + PyObject_HEAD + struct LxmlDocument *_doc; + xmlNode *_c_node; + PyObject *_tag; +}; + +/* "lxml/etree.pyx":1895 + * + * + * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<< + * object LxmlElementTree ]: + * cdef _Document _doc + */ +struct LxmlElementTree { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab; + struct LxmlDocument *_doc; + struct LxmlElement *_context_node; +}; + +/* "lxml/etree.pyx":2669 + * + * + * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<< + * type LxmlElementTagMatcherType ]: + * """ + */ +struct LxmlElementTagMatcher { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab; + PyObject *_pystrings; + int _node_type; + char *_href; + char *_name; +}; + +/* "lxml/etree.pyx":2700 + * self._name = NULL + * + * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<< + * object LxmlElementIterator, type LxmlElementIteratorType ]: + * """ + */ +struct LxmlElementIterator { + struct LxmlElementTagMatcher __pyx_base; + struct LxmlElement *_node; + _node_to_node_function _next_element; +}; + +/* "src/lxml/classlookup.pxi":6 + * # Custom Element classes + * + * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<< + * object LxmlElementBase ]: + * """ElementBase(*children, attrib=None, nsmap=None, **_extra) + */ +struct LxmlElementBase { + struct LxmlElement __pyx_base; +}; + +/* "src/lxml/classlookup.pxi":210 + * # Element class lookup + * + * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<< + * + * # class to store element class lookup functions + */ +typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *); + +/* "src/lxml/classlookup.pxi":213 + * + * # class to store element class lookup functions + * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<< + * object LxmlElementClassLookup ]: + * """ElementClassLookup(self) + */ +struct LxmlElementClassLookup { + PyObject_HEAD + _element_class_lookup_function _lookup_function; +}; + +/* "src/lxml/classlookup.pxi":221 + * + * + * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<< + * [ type LxmlFallbackElementClassLookupType, + * object LxmlFallbackElementClassLookup ]: + */ +struct LxmlFallbackElementClassLookup { + struct LxmlElementClassLookup __pyx_base; + struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab; + struct LxmlElementClassLookup *fallback; + _element_class_lookup_function _fallback_function; +}; + +#ifndef __PYX_HAVE_API__lxml__etree + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#ifndef DL_IMPORT + #define DL_IMPORT(_T) _T +#endif + +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType; + +__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *); +__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *); +__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int); +__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); +__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); +__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *); +__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *); +__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *); +__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *); +__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *); +__PYX_EXTERN_C int hasText(xmlNode *); +__PYX_EXTERN_C int hasTail(xmlNode *); +__PYX_EXTERN_C PyObject *textOf(xmlNode *); +__PYX_EXTERN_C PyObject *tailOf(xmlNode *); +__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *); +__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *); +__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *); +__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *); +__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int); +__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int); +__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *); +__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *); +__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C int hasChild(xmlNode *); +__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *nextElement(xmlNode *); +__PYX_EXTERN_C xmlNode *previousElement(xmlNode *); +__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *); +__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *); +__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *); +__PYX_EXTERN_C PyObject *utf8(PyObject *); +__PYX_EXTERN_C PyObject *getNsTag(PyObject *); +__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *); +__PYX_EXTERN_C PyObject *namespacedName(xmlNode *); +__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *); +__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *); +__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *); +__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *); + +#endif /* !__PYX_HAVE_API__lxml__etree */ + +/* WARNING: the interface of the module init function changed in CPython 3.5. */ +/* It now returns a PyModuleDef instance instead of a PyModule instance. */ + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initetree(void); +#else +/* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */ +PyMODINIT_FUNC PyInit_etree(void); + +#if PY_VERSION_HEX >= 0x03050000 && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201402L)) +#if defined(__cplusplus) && __cplusplus >= 201402L +[[deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")]] inline +#elif defined(__GNUC__) || defined(__clang__) +__attribute__ ((__deprecated__("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly."), __unused__)) __inline__ +#elif defined(_MSC_VER) +__declspec(deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")) __inline +#endif +static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) { + return res; +} +#define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree()) +#endif +#endif + +#endif /* !__PYX_HAVE__lxml__etree */ diff --git a/.venv/lib/python3.12/site-packages/lxml/etree.pyx b/.venv/lib/python3.12/site-packages/lxml/etree.pyx new file mode 100644 index 00000000..f7da01c1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/etree.pyx @@ -0,0 +1,3732 @@ +# cython: binding=True +# cython: auto_pickle=False +# cython: language_level=3 + +""" +The ``lxml.etree`` module implements the extended ElementTree API for XML. +""" + +__docformat__ = "restructuredtext en" + +__all__ = [ + 'AttributeBasedElementClassLookup', 'C14NError', 'C14NWriterTarget', 'CDATA', + 'Comment', 'CommentBase', 'CustomElementClassLookup', 'DEBUG', + 'DTD', 'DTDError', 'DTDParseError', 'DTDValidateError', + 'DocumentInvalid', 'ETCompatXMLParser', 'ETXPath', 'Element', + 'ElementBase', 'ElementClassLookup', 'ElementDefaultClassLookup', + 'ElementNamespaceClassLookup', 'ElementTree', 'Entity', 'EntityBase', + 'Error', 'ErrorDomains', 'ErrorLevels', 'ErrorTypes', 'Extension', + 'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser', + 'ICONV_COMPILED_VERSION', + 'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION', + 'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION', + 'LXML_VERSION', + 'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError', + 'NamespaceRegistryError', 'PI', 'PIBase', 'ParseError', + 'ParserBasedElementClassLookup', 'ParserError', 'ProcessingInstruction', + 'PyErrorLog', 'PythonElementClassLookup', 'QName', 'RelaxNG', + 'RelaxNGError', 'RelaxNGErrorTypes', 'RelaxNGParseError', + 'RelaxNGValidateError', 'Resolver', 'Schematron', 'SchematronError', + 'SchematronParseError', 'SchematronValidateError', 'SerialisationError', + 'SubElement', 'TreeBuilder', 'XInclude', 'XIncludeError', 'XML', + 'XMLDTDID', 'XMLID', 'XMLParser', 'XMLSchema', 'XMLSchemaError', + 'XMLSchemaParseError', 'XMLSchemaValidateError', 'XMLSyntaxError', + 'XMLTreeBuilder', 'XPath', 'XPathDocumentEvaluator', 'XPathError', + 'XPathEvalError', 'XPathEvaluator', 'XPathFunctionError', 'XPathResultError', + 'XPathSyntaxError', 'XSLT', 'XSLTAccessControl', 'XSLTApplyError', + 'XSLTError', 'XSLTExtension', 'XSLTExtensionError', 'XSLTParseError', + 'XSLTSaveError', 'canonicalize', + 'cleanup_namespaces', 'clear_error_log', 'dump', + 'fromstring', 'fromstringlist', 'get_default_parser', 'iselement', + 'iterparse', 'iterwalk', 'parse', 'parseid', 'register_namespace', + 'set_default_parser', 'set_element_class_lookup', 'strip_attributes', + 'strip_elements', 'strip_tags', 'tostring', 'tostringlist', 'tounicode', + 'use_global_python_log' + ] + +cimport cython + +from lxml cimport python +from lxml.includes cimport tree, config +from lxml.includes.tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement, _getNs +from lxml.includes.tree cimport const_xmlChar, xmlChar, _xcstr +from lxml.python cimport _cstr, _isString +from lxml.includes cimport xpath +from lxml.includes cimport c14n + +# Cython's standard declarations +cimport cpython.mem +cimport cpython.ref +from libc cimport limits, stdio, stdlib +from libc cimport string as cstring_h # not to be confused with stdlib 'string' +from libc.string cimport const_char + +cdef object os_path_abspath +from os.path import abspath as os_path_abspath + +cdef object BytesIO, StringIO +from io import BytesIO, StringIO + +cdef object OrderedDict +from collections import OrderedDict + +cdef object _elementpath +from lxml import _elementpath + +cdef object sys +import sys + +cdef object re +import re + +cdef object partial +from functools import partial + +cdef object islice +from itertools import islice + +cdef object ITER_EMPTY = iter(()) + +cdef object MutableMapping +from collections.abc import MutableMapping + +class _ImmutableMapping(MutableMapping): + def __getitem__(self, key): + raise KeyError, key + + def __setitem__(self, key, value): + raise KeyError, key + + def __delitem__(self, key): + raise KeyError, key + + def __contains__(self, key): + return False + + def __len__(self): + return 0 + + def __iter__(self): + return ITER_EMPTY + iterkeys = itervalues = iteritems = __iter__ + +cdef object IMMUTABLE_EMPTY_MAPPING = _ImmutableMapping() +del _ImmutableMapping + + +# the rules +# --------- +# any libxml C argument/variable is prefixed with c_ +# any non-public function/class is prefixed with an underscore +# instance creation is always through factories + +# what to do with libxml2/libxslt error messages? +# 0 : drop +# 1 : use log +DEF __DEBUG = 1 + +# maximum number of lines in the libxml2/xslt log if __DEBUG == 1 +DEF __MAX_LOG_SIZE = 100 + +# make the compiled-in debug state publicly available +DEBUG = __DEBUG + +# A struct to store a cached qualified tag name+href pair. +# While we can borrow the c_name from the document dict, +# PyPy requires us to store a Python reference for the +# namespace in order to keep the byte buffer alive. +cdef struct qname: + const_xmlChar* c_name + python.PyObject* href + +# initialize parser (and threading) +xmlparser.xmlInitParser() + +# global per-thread setup +tree.xmlThrDefIndentTreeOutput(1) +tree.xmlThrDefLineNumbersDefaultValue(1) + +_initThreadLogging() + +# filename encoding +cdef bytes _FILENAME_ENCODING = (sys.getfilesystemencoding() or sys.getdefaultencoding() or 'ascii').encode("UTF-8") +cdef char* _C_FILENAME_ENCODING = _cstr(_FILENAME_ENCODING) + +# set up some default namespace prefixes +cdef dict _DEFAULT_NAMESPACE_PREFIXES = { + b"http://www.w3.org/XML/1998/namespace": b'xml', + b"http://www.w3.org/1999/xhtml": b"html", + b"http://www.w3.org/1999/XSL/Transform": b"xsl", + b"http://www.w3.org/1999/02/22-rdf-syntax-ns#": b"rdf", + b"http://schemas.xmlsoap.org/wsdl/": b"wsdl", + # xml schema + b"http://www.w3.org/2001/XMLSchema": b"xs", + b"http://www.w3.org/2001/XMLSchema-instance": b"xsi", + # dublin core + b"http://purl.org/dc/elements/1.1/": b"dc", + # objectify + b"http://codespeak.net/lxml/objectify/pytype" : b"py", +} + +# To avoid runtime encoding overhead, we keep a Unicode copy +# of the uri-prefix mapping as (str, str) items view. +cdef object _DEFAULT_NAMESPACE_PREFIXES_ITEMS = [] + +cdef _update_default_namespace_prefixes_items(): + cdef bytes ns, prefix + global _DEFAULT_NAMESPACE_PREFIXES_ITEMS + _DEFAULT_NAMESPACE_PREFIXES_ITEMS = { + ns.decode('utf-8') : prefix.decode('utf-8') + for ns, prefix in _DEFAULT_NAMESPACE_PREFIXES.items() + }.items() + +_update_default_namespace_prefixes_items() + +cdef object _check_internal_prefix = re.compile(br"ns\d+$").match + +def register_namespace(prefix, uri): + """Registers a namespace prefix that newly created Elements in that + namespace will use. The registry is global, and any existing + mapping for either the given prefix or the namespace URI will be + removed. + """ + prefix_utf, uri_utf = _utf8(prefix), _utf8(uri) + if _check_internal_prefix(prefix_utf): + raise ValueError("Prefix format reserved for internal use") + _tagValidOrRaise(prefix_utf) + _uriValidOrRaise(uri_utf) + if (uri_utf == b"http://www.w3.org/XML/1998/namespace" and prefix_utf != b'xml' + or prefix_utf == b'xml' and uri_utf != b"http://www.w3.org/XML/1998/namespace"): + raise ValueError("Cannot change the 'xml' prefix of the XML namespace") + for k, v in list(_DEFAULT_NAMESPACE_PREFIXES.items()): + if k == uri_utf or v == prefix_utf: + del _DEFAULT_NAMESPACE_PREFIXES[k] + _DEFAULT_NAMESPACE_PREFIXES[uri_utf] = prefix_utf + _update_default_namespace_prefixes_items() + + +# Error superclass for ElementTree compatibility +cdef class Error(Exception): + pass + +# module level superclass for all exceptions +cdef class LxmlError(Error): + """Main exception base class for lxml. All other exceptions inherit from + this one. + """ + def __init__(self, message, error_log=None): + super(_Error, self).__init__(message) + if error_log is None: + self.error_log = __copyGlobalErrorLog() + else: + self.error_log = error_log.copy() + +cdef object _Error = Error + + +# superclass for all syntax errors +class LxmlSyntaxError(LxmlError, SyntaxError): + """Base class for all syntax errors. + """ + +cdef class C14NError(LxmlError): + """Error during C14N serialisation. + """ + +# version information +cdef tuple __unpackDottedVersion(version): + version_list = [] + l = (version.decode("ascii").replace('-', '.').split('.') + [0]*4)[:4] + for item in l: + try: + item = int(item) + except ValueError: + if item.startswith('dev'): + count = item[3:] + item = -300 + elif item.startswith('alpha'): + count = item[5:] + item = -200 + elif item.startswith('beta'): + count = item[4:] + item = -100 + else: + count = 0 + if count: + item += int(count) + version_list.append(item) + return tuple(version_list) + +cdef tuple __unpackIntVersion(int c_version, int base=100): + return ( + ((c_version // (base*base)) % base), + ((c_version // base) % base), + (c_version % base) + ) + +cdef int _LIBXML_VERSION_INT +try: + _LIBXML_VERSION_INT = int( + re.match('[0-9]+', (<unsigned char*>tree.xmlParserVersion).decode("ascii")).group(0)) +except Exception: + print("Unknown libxml2 version: " + (<unsigned char*>tree.xmlParserVersion).decode("latin1")) + _LIBXML_VERSION_INT = 0 + +LIBXML_VERSION = __unpackIntVersion(_LIBXML_VERSION_INT) +LIBXML_COMPILED_VERSION = __unpackIntVersion(tree.LIBXML_VERSION) +LXML_VERSION = __unpackDottedVersion(tree.LXML_VERSION_STRING) + +__version__ = tree.LXML_VERSION_STRING.decode("ascii") + +cdef extern from *: + """ + #ifdef ZLIB_VERNUM + #define __lxml_zlib_version (ZLIB_VERNUM >> 4) + #else + #define __lxml_zlib_version 0 + #endif + #ifdef _LIBICONV_VERSION + #define __lxml_iconv_version (_LIBICONV_VERSION << 8) + #else + #define __lxml_iconv_version 0 + #endif + """ + # zlib isn't included automatically by libxml2's headers + #long ZLIB_HEX_VERSION "__lxml_zlib_version" + long LIBICONV_HEX_VERSION "__lxml_iconv_version" + +#ZLIB_COMPILED_VERSION = __unpackIntVersion(ZLIB_HEX_VERSION, base=0x10) +ICONV_COMPILED_VERSION = __unpackIntVersion(LIBICONV_HEX_VERSION, base=0x100)[:2] + + +# class for temporary storage of Python references, +# used e.g. for XPath results +@cython.final +@cython.internal +cdef class _TempStore: + cdef list _storage + def __init__(self): + self._storage = [] + + cdef int add(self, obj) except -1: + self._storage.append(obj) + return 0 + + cdef int clear(self) except -1: + del self._storage[:] + return 0 + + +# class for temporarily storing exceptions raised in extensions +@cython.internal +cdef class _ExceptionContext: + cdef object _exc_info + cdef int clear(self) except -1: + self._exc_info = None + return 0 + + cdef void _store_raised(self) noexcept: + try: + self._exc_info = sys.exc_info() + except BaseException as e: + self._store_exception(e) + finally: + return # and swallow any further exceptions + + cdef int _store_exception(self, exception) except -1: + self._exc_info = (exception, None, None) + return 0 + + cdef bint _has_raised(self) except -1: + return self._exc_info is not None + + cdef int _raise_if_stored(self) except -1: + if self._exc_info is None: + return 0 + type, value, traceback = self._exc_info + self._exc_info = None + if value is None and traceback is None: + raise type + else: + raise type, value, traceback + + +# type of a function that steps from node to node +ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) + + +################################################################################ +# Include submodules + +include "proxy.pxi" # Proxy handling (element backpointers/memory/etc.) +include "apihelpers.pxi" # Private helper functions +include "xmlerror.pxi" # Error and log handling + + +################################################################################ +# Public Python API + +@cython.final +@cython.freelist(8) +cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: + """Internal base class to reference a libxml document. + + When instances of this class are garbage collected, the libxml + document is cleaned up. + """ + cdef int _ns_counter + cdef bytes _prefix_tail + cdef xmlDoc* _c_doc + cdef _BaseParser _parser + + def __dealloc__(self): + # if there are no more references to the document, it is safe + # to clean the whole thing up, as all nodes have a reference to + # the document + tree.xmlFreeDoc(self._c_doc) + + @cython.final + cdef getroot(self): + # return an element proxy for the document root + cdef xmlNode* c_node + c_node = tree.xmlDocGetRootElement(self._c_doc) + if c_node is NULL: + return None + return _elementFactory(self, c_node) + + @cython.final + cdef bint hasdoctype(self) noexcept: + # DOCTYPE gets parsed into internal subset (xmlDTD*) + return self._c_doc is not NULL and self._c_doc.intSubset is not NULL + + @cython.final + cdef getdoctype(self): + # get doctype info: root tag, public/system ID (or None if not known) + cdef tree.xmlDtd* c_dtd + cdef xmlNode* c_root_node + public_id = None + sys_url = None + c_dtd = self._c_doc.intSubset + if c_dtd is not NULL: + if c_dtd.ExternalID is not NULL: + public_id = funicode(c_dtd.ExternalID) + if c_dtd.SystemID is not NULL: + sys_url = funicode(c_dtd.SystemID) + c_dtd = self._c_doc.extSubset + if c_dtd is not NULL: + if not public_id and c_dtd.ExternalID is not NULL: + public_id = funicode(c_dtd.ExternalID) + if not sys_url and c_dtd.SystemID is not NULL: + sys_url = funicode(c_dtd.SystemID) + c_root_node = tree.xmlDocGetRootElement(self._c_doc) + if c_root_node is NULL: + root_name = None + else: + root_name = funicode(c_root_node.name) + return root_name, public_id, sys_url + + @cython.final + cdef getxmlinfo(self): + # return XML version and encoding (or None if not known) + cdef xmlDoc* c_doc = self._c_doc + if c_doc.version is NULL: + version = None + else: + version = funicode(c_doc.version) + if c_doc.encoding is NULL: + encoding = None + else: + encoding = funicode(c_doc.encoding) + return version, encoding + + @cython.final + cdef isstandalone(self): + # returns True for "standalone=true", + # False for "standalone=false", None if not provided + if self._c_doc.standalone == -1: + return None + else: + return <bint>(self._c_doc.standalone == 1) + + @cython.final + cdef bytes buildNewPrefix(self): + # get a new unique prefix ("nsX") for this document + cdef bytes ns + if self._ns_counter < len(_PREFIX_CACHE): + ns = _PREFIX_CACHE[self._ns_counter] + else: + ns = python.PyBytes_FromFormat("ns%d", self._ns_counter) + if self._prefix_tail is not None: + ns += self._prefix_tail + self._ns_counter += 1 + if self._ns_counter < 0: + # overflow! + self._ns_counter = 0 + if self._prefix_tail is None: + self._prefix_tail = b"A" + else: + self._prefix_tail += b"A" + return ns + + @cython.final + cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, + const_xmlChar* c_href, const_xmlChar* c_prefix, + bint is_attribute) except NULL: + """Get or create namespace structure for a node. Reuses the prefix if + possible. + """ + cdef xmlNs* c_ns + cdef xmlNs* c_doc_ns + cdef python.PyObject* dict_result + if c_node.type != tree.XML_ELEMENT_NODE: + assert c_node.type == tree.XML_ELEMENT_NODE, \ + "invalid node type %d, expected %d" % ( + c_node.type, tree.XML_ELEMENT_NODE) + # look for existing ns declaration + c_ns = _searchNsByHref(c_node, c_href, is_attribute) + if c_ns is not NULL: + if is_attribute and c_ns.prefix is NULL: + # do not put namespaced attributes into the default + # namespace as this would break serialisation + pass + else: + return c_ns + + # none found => determine a suitable new prefix + if c_prefix is NULL: + dict_result = python.PyDict_GetItem( + _DEFAULT_NAMESPACE_PREFIXES, <unsigned char*>c_href) + if dict_result is not NULL: + prefix = <object>dict_result + else: + prefix = self.buildNewPrefix() + c_prefix = _xcstr(prefix) + + # make sure the prefix is not in use already + while tree.xmlSearchNs(self._c_doc, c_node, c_prefix) is not NULL: + prefix = self.buildNewPrefix() + c_prefix = _xcstr(prefix) + + # declare the namespace and return it + c_ns = tree.xmlNewNs(c_node, c_href, c_prefix) + if c_ns is NULL: + raise MemoryError() + return c_ns + + @cython.final + cdef int _setNodeNs(self, xmlNode* c_node, const_xmlChar* c_href) except -1: + "Lookup namespace structure and set it for the node." + c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0) + tree.xmlSetNs(c_node, c_ns) + +cdef tuple __initPrefixCache(): + cdef int i + return tuple([ python.PyBytes_FromFormat("ns%d", i) + for i in range(30) ]) + +cdef tuple _PREFIX_CACHE = __initPrefixCache() + +cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser): + cdef _Document result + result = _Document.__new__(_Document) + result._c_doc = c_doc + result._ns_counter = 0 + result._prefix_tail = None + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + result._parser = parser + return result + + +cdef object _find_invalid_public_id_characters = re.compile( + ur"[^\x20\x0D\x0Aa-zA-Z0-9'()+,./:=?;!*#@$_%-]+").search + + +cdef class DocInfo: + "Document information provided by parser and DTD." + cdef _Document _doc + def __cinit__(self, tree): + "Create a DocInfo object for an ElementTree object or root Element." + self._doc = _documentOrRaise(tree) + root_name, public_id, system_url = self._doc.getdoctype() + if not root_name and (public_id or system_url): + raise ValueError, "Could not find root node" + + @property + def root_name(self): + """Returns the name of the root node as defined by the DOCTYPE.""" + root_name, public_id, system_url = self._doc.getdoctype() + return root_name + + @cython.final + cdef tree.xmlDtd* _get_c_dtd(self): + """"Return the DTD. Create it if it does not yet exist.""" + cdef xmlDoc* c_doc = self._doc._c_doc + cdef xmlNode* c_root_node + cdef const_xmlChar* c_name + + if c_doc.intSubset: + return c_doc.intSubset + + c_root_node = tree.xmlDocGetRootElement(c_doc) + c_name = c_root_node.name if c_root_node else NULL + return tree.xmlCreateIntSubset(c_doc, c_name, NULL, NULL) + + def clear(self): + """Removes DOCTYPE and internal subset from the document.""" + cdef xmlDoc* c_doc = self._doc._c_doc + cdef tree.xmlNode* c_dtd = <xmlNode*>c_doc.intSubset + if c_dtd is NULL: + return + tree.xmlUnlinkNode(c_dtd) + tree.xmlFreeNode(c_dtd) + + property public_id: + """Public ID of the DOCTYPE. + + Mutable. May be set to a valid string or None. If a DTD does not + exist, setting this variable (even to None) will create one. + """ + def __get__(self): + root_name, public_id, system_url = self._doc.getdoctype() + return public_id + + def __set__(self, value): + cdef xmlChar* c_value = NULL + if value is not None: + match = _find_invalid_public_id_characters(value) + if match: + raise ValueError, f'Invalid character(s) {match.group(0)!r} in public_id.' + value = _utf8(value) + c_value = tree.xmlStrdup(_xcstr(value)) + if not c_value: + raise MemoryError() + + c_dtd = self._get_c_dtd() + if not c_dtd: + tree.xmlFree(c_value) + raise MemoryError() + if c_dtd.ExternalID: + tree.xmlFree(<void*>c_dtd.ExternalID) + c_dtd.ExternalID = c_value + + property system_url: + """System ID of the DOCTYPE. + + Mutable. May be set to a valid string or None. If a DTD does not + exist, setting this variable (even to None) will create one. + """ + def __get__(self): + root_name, public_id, system_url = self._doc.getdoctype() + return system_url + + def __set__(self, value): + cdef xmlChar* c_value = NULL + if value is not None: + bvalue = _utf8(value) + # sys_url may be any valid unicode string that can be + # enclosed in single quotes or quotes. + if b"'" in bvalue and b'"' in bvalue: + raise ValueError( + 'System URL may not contain both single (\') and double quotes (").') + c_value = tree.xmlStrdup(_xcstr(bvalue)) + if not c_value: + raise MemoryError() + + c_dtd = self._get_c_dtd() + if not c_dtd: + tree.xmlFree(c_value) + raise MemoryError() + if c_dtd.SystemID: + tree.xmlFree(<void*>c_dtd.SystemID) + c_dtd.SystemID = c_value + + @property + def xml_version(self): + """Returns the XML version as declared by the document.""" + xml_version, encoding = self._doc.getxmlinfo() + return xml_version + + @property + def encoding(self): + """Returns the encoding name as declared by the document.""" + xml_version, encoding = self._doc.getxmlinfo() + return encoding + + @property + def standalone(self): + """Returns the standalone flag as declared by the document. The possible + values are True (``standalone='yes'``), False + (``standalone='no'`` or flag not provided in the declaration), + and None (unknown or no declaration found). Note that a + normal truth test on this value will always tell if the + ``standalone`` flag was set to ``'yes'`` or not. + """ + return self._doc.isstandalone() + + property URL: + "The source URL of the document (or None if unknown)." + def __get__(self): + if self._doc._c_doc.URL is NULL: + return None + return _decodeFilename(self._doc._c_doc.URL) + def __set__(self, url): + url = _encodeFilename(url) + c_oldurl = self._doc._c_doc.URL + if url is None: + self._doc._c_doc.URL = NULL + else: + self._doc._c_doc.URL = tree.xmlStrdup(_xcstr(url)) + if c_oldurl is not NULL: + tree.xmlFree(<void*>c_oldurl) + + @property + def doctype(self): + """Returns a DOCTYPE declaration string for the document.""" + root_name, public_id, system_url = self._doc.getdoctype() + if system_url: + # If '"' in system_url, we must escape it with single + # quotes, otherwise escape with double quotes. If url + # contains both a single quote and a double quote, XML + # standard is being violated. + if '"' in system_url: + quoted_system_url = f"'{system_url}'" + else: + quoted_system_url = f'"{system_url}"' + if public_id: + if system_url: + return f'<!DOCTYPE {root_name} PUBLIC "{public_id}" {quoted_system_url}>' + else: + return f'<!DOCTYPE {root_name} PUBLIC "{public_id}">' + elif system_url: + return f'<!DOCTYPE {root_name} SYSTEM {quoted_system_url}>' + elif self._doc.hasdoctype(): + return f'<!DOCTYPE {root_name}>' + else: + return '' + + @property + def internalDTD(self): + """Returns a DTD validator based on the internal subset of the document.""" + return _dtdFactory(self._doc._c_doc.intSubset) + + @property + def externalDTD(self): + """Returns a DTD validator based on the external subset of the document.""" + return _dtdFactory(self._doc._c_doc.extSubset) + + +@cython.no_gc_clear +cdef public class _Element [ type LxmlElementType, object LxmlElement ]: + """Element class. + + References a document object and a libxml node. + + By pointing to a Document instance, a reference is kept to + _Document as long as there is some pointer to a node in it. + """ + cdef _Document _doc + cdef xmlNode* _c_node + cdef object _tag + + def _init(self): + """_init(self) + + Called after object initialisation. Custom subclasses may override + this if they recursively call _init() in the superclasses. + """ + + @cython.linetrace(False) + @cython.profile(False) + def __dealloc__(self): + #print("trying to free node:", <int>self._c_node) + #displayNode(self._c_node, 0) + if self._c_node is not NULL: + _unregisterProxy(self) + attemptDeallocation(self._c_node) + + # MANIPULATORS + + def __setitem__(self, x, value): + """__setitem__(self, x, value) + + Replaces the given subelement index or slice. + """ + cdef xmlNode* c_node = NULL + cdef xmlNode* c_next + cdef xmlDoc* c_source_doc + cdef _Element element + cdef bint left_to_right + cdef Py_ssize_t slicelength = 0, step = 0 + _assertValidNode(self) + if value is None: + raise ValueError, "cannot assign None" + if isinstance(x, slice): + # slice assignment + _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) + if step > 0: + left_to_right = 1 + else: + left_to_right = 0 + step = -step + _replaceSlice(self, c_node, slicelength, step, left_to_right, value) + return + else: + # otherwise: normal item assignment + element = value + _assertValidNode(element) + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" + c_source_doc = element._c_node.doc + c_next = element._c_node.next + _removeText(c_node.next) + tree.xmlReplaceNode(c_node, element._c_node) + _moveTail(c_next, element._c_node) + moveNodeToDocument(self._doc, c_source_doc, element._c_node) + if not attemptDeallocation(c_node): + moveNodeToDocument(self._doc, c_node.doc, c_node) + + def __delitem__(self, x): + """__delitem__(self, x) + + Deletes the given subelement or a slice. + """ + cdef xmlNode* c_node = NULL + cdef xmlNode* c_next + cdef Py_ssize_t step = 0, slicelength = 0 + _assertValidNode(self) + if isinstance(x, slice): + # slice deletion + if _isFullSlice(<slice>x): + c_node = self._c_node.children + if c_node is not NULL: + if not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + c_next = _nextElement(c_node) + _removeNode(self._doc, c_node) + c_node = c_next + else: + _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) + _deleteSlice(self._doc, c_node, slicelength, step) + else: + # item deletion + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, f"index out of range: {x}" + _removeNode(self._doc, c_node) + + def __deepcopy__(self, memo): + "__deepcopy__(self, memo)" + return self.__copy__() + + def __copy__(self): + "__copy__(self)" + cdef xmlDoc* c_doc + cdef xmlNode* c_node + cdef _Document new_doc + _assertValidNode(self) + c_doc = _copyDocRoot(self._doc._c_doc, self._c_node) # recursive + new_doc = _documentFactory(c_doc, self._doc._parser) + root = new_doc.getroot() + if root is not None: + return root + # Comment/PI + c_node = c_doc.children + while c_node is not NULL and c_node.type != self._c_node.type: + c_node = c_node.next + if c_node is NULL: + return None + return _elementFactory(new_doc, c_node) + + def set(self, key, value): + """set(self, key, value) + + Sets an element attribute. + In HTML documents (not XML or XHTML), the value None is allowed and creates + an attribute without value (just the attribute name). + """ + _assertValidNode(self) + _setAttributeValue(self, key, value) + + def append(self, _Element element not None): + """append(self, element) + + Adds a subelement to the end of this element. + """ + _assertValidNode(self) + _assertValidNode(element) + _appendChild(self, element) + + def addnext(self, _Element element not None): + """addnext(self, element) + + Adds the element as a following sibling directly after this + element. + + This is normally used to set a processing instruction or comment after + the root node of a document. Note that tail text is automatically + discarded when adding at the root level. + """ + _assertValidNode(self) + _assertValidNode(element) + if self._c_node.parent != NULL and not _isElement(self._c_node.parent): + if element._c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE): + raise TypeError, "Only processing instructions and comments can be siblings of the root element" + element.tail = None + _appendSibling(self, element) + + def addprevious(self, _Element element not None): + """addprevious(self, element) + + Adds the element as a preceding sibling directly before this + element. + + This is normally used to set a processing instruction or comment + before the root node of a document. Note that tail text is + automatically discarded when adding at the root level. + """ + _assertValidNode(self) + _assertValidNode(element) + if self._c_node.parent != NULL and not _isElement(self._c_node.parent): + if element._c_node.type != tree.XML_PI_NODE: + if element._c_node.type != tree.XML_COMMENT_NODE: + raise TypeError, "Only processing instructions and comments can be siblings of the root element" + element.tail = None + _prependSibling(self, element) + + def extend(self, elements): + """extend(self, elements) + + Extends the current children by the elements in the iterable. + """ + cdef _Element element + _assertValidNode(self) + for element in elements: + if element is None: + raise TypeError, "Node must not be None" + _assertValidNode(element) + _appendChild(self, element) + + def clear(self, bint keep_tail=False): + """clear(self, keep_tail=False) + + Resets an element. This function removes all subelements, clears + all attributes and sets the text and tail properties to None. + + Pass ``keep_tail=True`` to leave the tail text untouched. + """ + cdef xmlAttr* c_attr + cdef xmlAttr* c_attr_next + cdef xmlNode* c_node + cdef xmlNode* c_node_next + _assertValidNode(self) + c_node = self._c_node + # remove self.text and self.tail + _removeText(c_node.children) + if not keep_tail: + _removeText(c_node.next) + # remove all attributes + c_attr = c_node.properties + if c_attr: + c_node.properties = NULL + tree.xmlFreePropList(c_attr) + # remove all subelements + c_node = c_node.children + if c_node and not _isElement(c_node): + c_node = _nextElement(c_node) + while c_node is not NULL: + c_node_next = _nextElement(c_node) + _removeNode(self._doc, c_node) + c_node = c_node_next + + def insert(self, index: int, _Element element not None): + """insert(self, index, element) + + Inserts a subelement at the given position in this element + """ + cdef xmlNode* c_node + cdef xmlNode* c_next + cdef xmlDoc* c_source_doc + _assertValidNode(self) + _assertValidNode(element) + c_node = _findChild(self._c_node, index) + if c_node is NULL: + _appendChild(self, element) + return + # prevent cycles + if _isAncestorOrSame(element._c_node, self._c_node): + raise ValueError("cannot append parent to itself") + c_source_doc = element._c_node.doc + c_next = element._c_node.next + tree.xmlAddPrevSibling(c_node, element._c_node) + _moveTail(c_next, element._c_node) + moveNodeToDocument(self._doc, c_source_doc, element._c_node) + + def remove(self, _Element element not None): + """remove(self, element) + + Removes a matching subelement. Unlike the find methods, this + method compares elements based on identity, not on tag value + or contents. + """ + cdef xmlNode* c_node + cdef xmlNode* c_next + _assertValidNode(self) + _assertValidNode(element) + c_node = element._c_node + if c_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + c_next = element._c_node.next + tree.xmlUnlinkNode(c_node) + _moveTail(c_next, c_node) + # fix namespace declarations + moveNodeToDocument(self._doc, c_node.doc, c_node) + + def replace(self, _Element old_element not None, + _Element new_element not None): + """replace(self, old_element, new_element) + + Replaces a subelement with the element passed as second argument. + """ + cdef xmlNode* c_old_node + cdef xmlNode* c_old_next + cdef xmlNode* c_new_node + cdef xmlNode* c_new_next + cdef xmlDoc* c_source_doc + _assertValidNode(self) + _assertValidNode(old_element) + _assertValidNode(new_element) + c_old_node = old_element._c_node + if c_old_node.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + c_new_node = new_element._c_node + # prevent cycles + if _isAncestorOrSame(c_new_node, self._c_node): + raise ValueError("cannot append parent to itself") + # replace node + c_old_next = c_old_node.next + c_new_next = c_new_node.next + c_source_doc = c_new_node.doc + tree.xmlReplaceNode(c_old_node, c_new_node) + _moveTail(c_new_next, c_new_node) + _moveTail(c_old_next, c_old_node) + moveNodeToDocument(self._doc, c_source_doc, c_new_node) + # fix namespace declarations + moveNodeToDocument(self._doc, c_old_node.doc, c_old_node) + + # PROPERTIES + property tag: + """Element tag + """ + def __get__(self): + if self._tag is not None: + return self._tag + _assertValidNode(self) + self._tag = _namespacedName(self._c_node) + return self._tag + + def __set__(self, value): + cdef _BaseParser parser + _assertValidNode(self) + ns, name = _getNsTag(value) + parser = self._doc._parser + if parser is not None and parser._for_html: + _htmlTagValidOrRaise(name) + else: + _tagValidOrRaise(name) + self._tag = value + tree.xmlNodeSetName(self._c_node, _xcstr(name)) + if ns is None: + self._c_node.ns = NULL + else: + self._doc._setNodeNs(self._c_node, _xcstr(ns)) + + @property + def attrib(self): + """Element attribute dictionary. Where possible, use get(), set(), + keys(), values() and items() to access element attributes. + """ + return _Attrib.__new__(_Attrib, self) + + property text: + """Text before the first subelement. This is either a string or + the value None, if there was no text. + """ + def __get__(self): + _assertValidNode(self) + return _collectText(self._c_node.children) + + def __set__(self, value): + _assertValidNode(self) + if isinstance(value, QName): + value = _resolveQNameText(self, value).decode('utf8') + _setNodeText(self._c_node, value) + + # using 'del el.text' is the wrong thing to do + #def __del__(self): + # _setNodeText(self._c_node, None) + + property tail: + """Text after this element's end tag, but before the next sibling + element's start tag. This is either a string or the value None, if + there was no text. + """ + def __get__(self): + _assertValidNode(self) + return _collectText(self._c_node.next) + + def __set__(self, value): + _assertValidNode(self) + _setTailText(self._c_node, value) + + # using 'del el.tail' is the wrong thing to do + #def __del__(self): + # _setTailText(self._c_node, None) + + # not in ElementTree, read-only + @property + def prefix(self): + """Namespace prefix or None. + """ + if self._c_node.ns is not NULL: + if self._c_node.ns.prefix is not NULL: + return funicode(self._c_node.ns.prefix) + return None + + # not in ElementTree, read-only + property sourceline: + """Original line number as found by the parser or None if unknown. + """ + def __get__(self): + cdef long line + _assertValidNode(self) + line = tree.xmlGetLineNo(self._c_node) + return line if line > 0 else None + + def __set__(self, line): + _assertValidNode(self) + if line <= 0: + self._c_node.line = 0 + else: + self._c_node.line = line + + # not in ElementTree, read-only + @property + def nsmap(self): + """Namespace prefix->URI mapping known in the context of this + Element. This includes all namespace declarations of the + parents. + + Note that changing the returned dict has no effect on the Element. + """ + _assertValidNode(self) + return _build_nsmap(self._c_node) + + # not in ElementTree, read-only + property base: + """The base URI of the Element (xml:base or HTML base URL). + None if the base URI is unknown. + + Note that the value depends on the URL of the document that + holds the Element if there is no xml:base attribute on the + Element or its ancestors. + + Setting this property will set an xml:base attribute on the + Element, regardless of the document type (XML or HTML). + """ + def __get__(self): + _assertValidNode(self) + c_base = tree.xmlNodeGetBase(self._doc._c_doc, self._c_node) + if c_base is NULL: + if self._doc._c_doc.URL is NULL: + return None + return _decodeFilename(self._doc._c_doc.URL) + try: + base = _decodeFilename(c_base) + finally: + tree.xmlFree(c_base) + return base + + def __set__(self, url): + _assertValidNode(self) + if url is None: + c_base = <const_xmlChar*>NULL + else: + url = _encodeFilename(url) + c_base = _xcstr(url) + tree.xmlNodeSetBase(self._c_node, c_base) + + # ACCESSORS + def __repr__(self): + "__repr__(self)" + return "<Element %s at 0x%x>" % (self.tag, id(self)) + + def __getitem__(self, x): + """Returns the subelement at the given position or the requested + slice. + """ + cdef xmlNode* c_node = NULL + cdef Py_ssize_t step = 0, slicelength = 0 + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element + cdef list result + _assertValidNode(self) + if isinstance(x, slice): + # slicing + if _isFullSlice(<slice>x): + return _collectChildren(self) + _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) + if c_node is NULL: + return [] + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement + result = [] + c = 0 + while c_node is not NULL and c < slicelength: + result.append(_elementFactory(self._doc, c_node)) + c += 1 + for i in range(step): + c_node = next_element(c_node) + if c_node is NULL: + break + return result + else: + # indexing + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" + return _elementFactory(self._doc, c_node) + + def __len__(self): + """__len__(self) + + Returns the number of subelements. + """ + _assertValidNode(self) + return _countElements(self._c_node.children) + + def __bool__(self): + """__bool__(self)""" + import warnings + warnings.warn( + "Truth-testing of elements was a source of confusion and will always " + "return True in future versions. " + "Use specific 'len(elem)' or 'elem is not None' test instead.", + FutureWarning + ) + # emulate old behaviour + _assertValidNode(self) + return _hasChild(self._c_node) + + def __contains__(self, element): + "__contains__(self, element)" + cdef xmlNode* c_node + _assertValidNode(self) + if not isinstance(element, _Element): + return 0 + c_node = (<_Element>element)._c_node + return c_node is not NULL and c_node.parent is self._c_node + + def __iter__(self): + "__iter__(self)" + return ElementChildIterator(self) + + def __reversed__(self): + "__reversed__(self)" + return ElementChildIterator(self, reversed=True) + + def index(self, child: _Element, start: int = None, stop: int = None): + """index(self, child, start=None, stop=None) + + Find the position of the child within the parent. + + This method is not part of the original ElementTree API. + """ + cdef Py_ssize_t k, l + cdef Py_ssize_t c_start, c_stop + cdef xmlNode* c_child + cdef xmlNode* c_start_node + _assertValidNode(self) + _assertValidNode(child) + c_child = child._c_node + if c_child.parent is not self._c_node: + raise ValueError, "Element is not a child of this node." + + # handle the unbounded search straight away (normal case) + if stop is None and (start is None or start == 0): + k = 0 + c_child = c_child.prev + while c_child is not NULL: + if _isElement(c_child): + k += 1 + c_child = c_child.prev + return k + + # check indices + if start is None: + c_start = 0 + else: + c_start = start + if stop is None: + c_stop = 0 + else: + c_stop = stop + if c_stop == 0 or \ + c_start >= c_stop and (c_stop > 0 or c_start < 0): + raise ValueError, "list.index(x): x not in slice" + + # for negative slice indices, check slice before searching index + if c_start < 0 or c_stop < 0: + # start from right, at most up to leftmost(c_start, c_stop) + if c_start < c_stop: + k = -c_start + else: + k = -c_stop + c_start_node = self._c_node.last + l = 1 + while c_start_node != c_child and l < k: + if _isElement(c_start_node): + l += 1 + c_start_node = c_start_node.prev + if c_start_node == c_child: + # found! before slice end? + if c_stop < 0 and l <= -c_stop: + raise ValueError, "list.index(x): x not in slice" + elif c_start < 0: + raise ValueError, "list.index(x): x not in slice" + + # now determine the index backwards from child + c_child = c_child.prev + k = 0 + if c_stop > 0: + # we can optimize: stop after c_stop elements if not found + while c_child != NULL and k < c_stop: + if _isElement(c_child): + k += 1 + c_child = c_child.prev + if k < c_stop: + return k + else: + # traverse all + while c_child != NULL: + if _isElement(c_child): + k = k + 1 + c_child = c_child.prev + if c_start > 0: + if k >= c_start: + return k + else: + return k + if c_start != 0 or c_stop != 0: + raise ValueError, "list.index(x): x not in slice" + else: + raise ValueError, "list.index(x): x not in list" + + def get(self, key, default=None): + """get(self, key, default=None) + + Gets an element attribute. + """ + _assertValidNode(self) + return _getAttributeValue(self, key, default) + + def keys(self): + """keys(self) + + Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). + """ + _assertValidNode(self) + return _collectAttributes(self._c_node, 1) + + def values(self): + """values(self) + + Gets element attribute values as a sequence of strings. The + attributes are returned in an arbitrary order. + """ + _assertValidNode(self) + return _collectAttributes(self._c_node, 2) + + def items(self): + """items(self) + + Gets element attributes, as a sequence. The attributes are returned in + an arbitrary order. + """ + _assertValidNode(self) + return _collectAttributes(self._c_node, 3) + + def getchildren(self): + """getchildren(self) + + Returns all direct children. The elements are returned in document + order. + + :deprecated: Note that this method has been deprecated as of + ElementTree 1.3 and lxml 2.0. New code should use + ``list(element)`` or simply iterate over elements. + """ + _assertValidNode(self) + return _collectChildren(self) + + def getparent(self): + """getparent(self) + + Returns the parent of this element or None for the root element. + """ + cdef xmlNode* c_node + #_assertValidNode(self) # not needed + c_node = _parentElement(self._c_node) + if c_node is NULL: + return None + return _elementFactory(self._doc, c_node) + + def getnext(self): + """getnext(self) + + Returns the following sibling of this element or None. + """ + cdef xmlNode* c_node + #_assertValidNode(self) # not needed + c_node = _nextElement(self._c_node) + if c_node is NULL: + return None + return _elementFactory(self._doc, c_node) + + def getprevious(self): + """getprevious(self) + + Returns the preceding sibling of this element or None. + """ + cdef xmlNode* c_node + #_assertValidNode(self) # not needed + c_node = _previousElement(self._c_node) + if c_node is NULL: + return None + return _elementFactory(self._doc, c_node) + + def itersiblings(self, tag=None, *tags, preceding=False): + """itersiblings(self, tag=None, *tags, preceding=False) + + Iterate over the following or preceding siblings of this element. + + The direction is determined by the 'preceding' keyword which + defaults to False, i.e. forward iteration over the following + siblings. When True, the iterator yields the preceding + siblings in reverse document order, i.e. starting right before + the current element and going backwards. + + Can be restricted to find only elements with specific tags, + see `iter`. + """ + if preceding: + if self._c_node and not self._c_node.prev: + return ITER_EMPTY + elif self._c_node and not self._c_node.next: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return SiblingsIterator(self, tags, preceding=preceding) + + def iterancestors(self, tag=None, *tags): + """iterancestors(self, tag=None, *tags) + + Iterate over the ancestors of this element (from parent to parent). + + Can be restricted to find only elements with specific tags, + see `iter`. + """ + if self._c_node and not self._c_node.parent: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return AncestorsIterator(self, tags) + + def iterdescendants(self, tag=None, *tags): + """iterdescendants(self, tag=None, *tags) + + Iterate over the descendants of this element in document order. + + As opposed to ``el.iter()``, this iterator does not yield the element + itself. The returned elements can be restricted to find only elements + with specific tags, see `iter`. + """ + if self._c_node and not self._c_node.children: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return ElementDepthFirstIterator(self, tags, inclusive=False) + + def iterchildren(self, tag=None, *tags, reversed=False): + """iterchildren(self, tag=None, *tags, reversed=False) + + Iterate over the children of this element. + + As opposed to using normal iteration on this element, the returned + elements can be reversed with the 'reversed' keyword and restricted + to find only elements with specific tags, see `iter`. + """ + if self._c_node and not self._c_node.children: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return ElementChildIterator(self, tags, reversed=reversed) + + def getroottree(self): + """getroottree(self) + + Return an ElementTree for the root node of the document that + contains this element. + + This is the same as following element.getparent() up the tree until it + returns None (for the root element) and then build an ElementTree for + the last parent that was returned.""" + _assertValidDoc(self._doc) + return _elementTreeFactory(self._doc, None) + + def getiterator(self, tag=None, *tags): + """getiterator(self, tag=None, *tags) + + Returns a sequence or iterator of all elements in the subtree in + document order (depth first pre-order), starting with this + element. + + Can be restricted to find only elements with specific tags, + see `iter`. + + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``element.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. + """ + if tag is not None: + tags += (tag,) + return ElementDepthFirstIterator(self, tags) + + def iter(self, tag=None, *tags): + """iter(self, tag=None, *tags) + + Iterate over all elements in the subtree in document order (depth + first pre-order), starting with this element. + + Can be restricted to find only elements with specific tags: + pass ``"{ns}localname"`` as tag. Either or both of ``ns`` and + ``localname`` can be ``*`` for a wildcard; ``ns`` can be empty + for no namespace. ``"localname"`` is equivalent to ``"{}localname"`` + (i.e. no namespace) but ``"*"`` is ``"{*}*"`` (any or no namespace), + not ``"{}*"``. + + You can also pass the Element, Comment, ProcessingInstruction and + Entity factory functions to look only for the specific element type. + + Passing multiple tags (or a sequence of tags) instead of a single tag + will let the iterator return all elements matching any of these tags, + in document order. + """ + if tag is not None: + tags += (tag,) + return ElementDepthFirstIterator(self, tags) + + def itertext(self, tag=None, *tags, with_tail=True): + """itertext(self, tag=None, *tags, with_tail=True) + + Iterates over the text content of a subtree. + + You can pass tag names to restrict text content to specific elements, + see `iter`. + + You can set the ``with_tail`` keyword argument to ``False`` to skip + over tail text. + """ + if tag is not None: + tags += (tag,) + return ElementTextIterator(self, tags, with_tail=with_tail) + + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with the same document. + """ + _assertValidDoc(self._doc) + return _makeElement(_tag, NULL, self._doc, None, None, None, + attrib, nsmap, _extra) + + def find(self, path, namespaces=None): + """find(self, path, namespaces=None) + + Finds the first matching subelement, by tag name or path. + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + if isinstance(path, QName): + path = (<QName>path).text + return _elementpath.find(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findtext(self, path, default=None, namespaces=None): + """findtext(self, path, default=None, namespaces=None) + + Finds text for the first matching subelement, by tag name or path. + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + if isinstance(path, QName): + path = (<QName>path).text + return _elementpath.findtext(self, path, default, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def findall(self, path, namespaces=None): + """findall(self, path, namespaces=None) + + Finds all matching subelements, by tag name or path. + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + if isinstance(path, QName): + path = (<QName>path).text + return _elementpath.findall(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def iterfind(self, path, namespaces=None): + """iterfind(self, path, namespaces=None) + + Iterates over all matching subelements, by tag name or path. + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + if isinstance(path, QName): + path = (<QName>path).text + return _elementpath.iterfind(self, path, namespaces, with_prefixes=not _isHtmlDocument(self)) + + def xpath(self, _path, *, namespaces=None, extensions=None, + smart_strings=True, **_variables): + """xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables) + + Evaluate an xpath expression using the element as context node. + """ + evaluator = XPathElementEvaluator(self, namespaces=namespaces, + extensions=extensions, + smart_strings=smart_strings) + return evaluator(_path, **_variables) + + def cssselect(self, expr, *, translator='xml'): + """ + Run the CSS expression on this element and its children, + returning a list of the results. + + Equivalent to lxml.cssselect.CSSSelect(expr)(self) -- note + that pre-compiling the expression can provide a substantial + speedup. + """ + # Do the import here to make the dependency optional. + from lxml.cssselect import CSSSelector + return CSSSelector(expr, translator=translator)(self) + + +cdef extern from "includes/etree_defs.h": + # macro call to 't->tp_new()' for fast instantiation + cdef object NEW_ELEMENT "PY_NEW" (object t) + + +@cython.linetrace(False) +cdef _Element _elementFactory(_Document doc, xmlNode* c_node): + cdef _Element result + result = getProxy(c_node) + if result is not None: + return result + if c_node is NULL: + return None + + element_class = LOOKUP_ELEMENT_CLASS( + ELEMENT_CLASS_LOOKUP_STATE, doc, c_node) + if hasProxy(c_node): + # prevent re-entry race condition - we just called into Python + return getProxy(c_node) + result = NEW_ELEMENT(element_class) + if hasProxy(c_node): + # prevent re-entry race condition - we just called into Python + result._c_node = NULL + return getProxy(c_node) + + _registerProxy(result, doc, c_node) + if element_class is not _Element: + result._init() + return result + + +@cython.internal +cdef class __ContentOnlyElement(_Element): + cdef int _raiseImmutable(self) except -1: + raise TypeError, "this element does not have children or attributes" + + def set(self, key, value): + "set(self, key, value)" + self._raiseImmutable() + + def append(self, value): + "append(self, value)" + self._raiseImmutable() + + def insert(self, index, value): + "insert(self, index, value)" + self._raiseImmutable() + + def __setitem__(self, index, value): + "__setitem__(self, index, value)" + self._raiseImmutable() + + @property + def attrib(self): + return IMMUTABLE_EMPTY_MAPPING + + property text: + def __get__(self): + _assertValidNode(self) + return funicodeOrEmpty(self._c_node.content) + + def __set__(self, value): + cdef tree.xmlDict* c_dict + _assertValidNode(self) + if value is None: + c_text = <const_xmlChar*>NULL + else: + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetContent(self._c_node, c_text) + + # ACCESSORS + def __getitem__(self, x): + "__getitem__(self, x)" + if isinstance(x, slice): + return [] + else: + raise IndexError, "list index out of range" + + def __len__(self): + "__len__(self)" + return 0 + + def get(self, key, default=None): + "get(self, key, default=None)" + return None + + def keys(self): + "keys(self)" + return [] + + def items(self): + "items(self)" + return [] + + def values(self): + "values(self)" + return [] + +cdef class _Comment(__ContentOnlyElement): + @property + def tag(self): + return Comment + + def __repr__(self): + return "<!--%s-->" % self.text + +cdef class _ProcessingInstruction(__ContentOnlyElement): + @property + def tag(self): + return ProcessingInstruction + + property target: + # not in ElementTree + def __get__(self): + _assertValidNode(self) + return funicode(self._c_node.name) + + def __set__(self, value): + _assertValidNode(self) + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + + def __repr__(self): + text = self.text + if text: + return "<?%s %s?>" % (self.target, text) + else: + return "<?%s?>" % self.target + + def get(self, key, default=None): + """get(self, key, default=None) + + Try to parse pseudo-attributes from the text content of the + processing instruction, search for one with the given key as + name and return its associated value. + + Note that this is only a convenience method for the most + common case that all text content is structured in + attribute-like name-value pairs with properly quoted values. + It is not guaranteed to work for all possible text content. + """ + return self.attrib.get(key, default) + + @property + def attrib(self): + """Returns a dict containing all pseudo-attributes that can be + parsed from the text content of this processing instruction. + Note that modifying the dict currently has no effect on the + XML node, although this is not guaranteed to stay this way. + """ + return { attr : (value1 or value2) + for attr, value1, value2 in _FIND_PI_ATTRIBUTES(' ' + self.text) } + +cdef object _FIND_PI_ATTRIBUTES = re.compile(r'\s+(\w+)\s*=\s*(?:\'([^\']*)\'|"([^"]*)")', re.U).findall + +cdef class _Entity(__ContentOnlyElement): + @property + def tag(self): + return Entity + + property name: + # not in ElementTree + def __get__(self): + _assertValidNode(self) + return funicode(self._c_node.name) + + def __set__(self, value): + _assertValidNode(self) + value_utf = _utf8(value) + if b'&' in value_utf or b';' in value_utf: + raise ValueError, f"Invalid entity name '{value}'" + tree.xmlNodeSetName(self._c_node, _xcstr(value_utf)) + + @property + def text(self): + # FIXME: should this be None or '&[VALUE];' or the resolved + # entity value ? + _assertValidNode(self) + return f'&{funicode(self._c_node.name)};' + + def __repr__(self): + return "&%s;" % self.name + + +cdef class QName: + """QName(text_or_uri_or_element, tag=None) + + QName wrapper for qualified XML names. + + Pass a tag name by itself or a namespace URI and a tag name to + create a qualified name. Alternatively, pass an Element to + extract its tag name. ``None`` as first argument is ignored in + order to allow for generic 2-argument usage. + + The ``text`` property holds the qualified name in + ``{namespace}tagname`` notation. The ``namespace`` and + ``localname`` properties hold the respective parts of the tag + name. + + You can pass QName objects wherever a tag name is expected. Also, + setting Element text from a QName will resolve the namespace prefix + on assignment and set a qualified text value. This is helpful in XML + languages like SOAP or XML-Schema that use prefixed tag names in + their text content. + """ + cdef readonly unicode text + cdef readonly unicode localname + cdef readonly unicode namespace + def __init__(self, text_or_uri_or_element, tag=None): + if text_or_uri_or_element is None: + # Allow None as no namespace. + text_or_uri_or_element, tag = tag, None + if not _isString(text_or_uri_or_element): + if isinstance(text_or_uri_or_element, _Element): + text_or_uri_or_element = (<_Element>text_or_uri_or_element).tag + if not _isString(text_or_uri_or_element): + raise ValueError, f"Invalid input tag of type {type(text_or_uri_or_element)!r}" + elif isinstance(text_or_uri_or_element, QName): + text_or_uri_or_element = (<QName>text_or_uri_or_element).text + elif text_or_uri_or_element is not None: + text_or_uri_or_element = unicode(text_or_uri_or_element) + else: + raise ValueError, f"Invalid input tag of type {type(text_or_uri_or_element)!r}" + + ns_utf, tag_utf = _getNsTag(text_or_uri_or_element) + if tag is not None: + # either ('ns', 'tag') or ('{ns}oldtag', 'newtag') + if ns_utf is None: + ns_utf = tag_utf # case 1: namespace ended up as tag name + tag_utf = _utf8(tag) + _tagValidOrRaise(tag_utf) + self.localname = (<bytes>tag_utf).decode('utf8') + if ns_utf is None: + self.namespace = None + self.text = self.localname + else: + self.namespace = (<bytes>ns_utf).decode('utf8') + self.text = "{%s}%s" % (self.namespace, self.localname) + def __str__(self): + return self.text + def __hash__(self): + return hash(self.text) + def __richcmp__(self, other, int op): + try: + if type(other) is QName: + other = (<QName>other).text + elif not isinstance(other, unicode): + other = unicode(other) + except (ValueError, UnicodeDecodeError): + return NotImplemented + return python.PyObject_RichCompare(self.text, other, op) + + +cdef public class _ElementTree [ type LxmlElementTreeType, + object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _context_node + + # Note that _doc is only used to store the original document if we do not + # have a _context_node. All methods should prefer self._context_node._doc + # to honour tree restructuring. _doc can happily be None! + + @cython.final + cdef int _assertHasRoot(self) except -1: + """We have to take care here: the document may not have a root node! + This can happen if ElementTree() is called without any argument and + the caller 'forgets' to call parse() afterwards, so this is a bug in + the caller program. + """ + assert self._context_node is not None, \ + "ElementTree not initialized, missing root" + return 0 + + def parse(self, source, _BaseParser parser=None, *, base_url=None): + """parse(self, source, parser=None, base_url=None) + + Updates self with the content of source and returns its root. + """ + cdef _Document doc = None + try: + doc = _parseDocument(source, parser, base_url) + except _TargetParserResult as result_container: + # raises a TypeError if we don't get an _Element + self._context_node = result_container.result + else: + self._context_node = doc.getroot() + self._doc = None if self._context_node is not None else doc + return self._context_node + + def _setroot(self, _Element root not None): + """_setroot(self, root) + + Relocate the ElementTree to a new root node. + """ + _assertValidNode(root) + if root._c_node.type != tree.XML_ELEMENT_NODE: + raise TypeError, "Only elements can be the root of an ElementTree" + self._context_node = root + self._doc = None + + def getroot(self): + """getroot(self) + + Gets the root element for this tree. + """ + return self._context_node + + def __copy__(self): + return _elementTreeFactory(self._doc, self._context_node) + + def __deepcopy__(self, memo): + cdef _Element root + cdef _Document doc + cdef xmlDoc* c_doc + if self._context_node is not None: + root = self._context_node.__copy__() + assert root is not None + _assertValidNode(root) + _copyNonElementSiblings(self._context_node._c_node, root._c_node) + return _elementTreeFactory(None, root) + elif self._doc is not None: + _assertValidDoc(self._doc) + c_doc = tree.xmlCopyDoc(self._doc._c_doc, 1) + if c_doc is NULL: + raise MemoryError() + doc = _documentFactory(c_doc, self._doc._parser) + return _elementTreeFactory(doc, None) + else: + # so what ... + return self + + # not in ElementTree + @property + def docinfo(self) -> DocInfo: + """Information about the document provided by parser and DTD.""" + self._assertHasRoot() + return DocInfo(self._context_node._doc) + + # not in ElementTree, read-only + @property + def parser(self): + """The parser that was used to parse the document in this ElementTree. + """ + if self._context_node is not None and \ + self._context_node._doc is not None: + return self._context_node._doc._parser + if self._doc is not None: + return self._doc._parser + return None + + def write(self, file, *, encoding=None, method="xml", + bint pretty_print=False, xml_declaration=None, bint with_tail=True, + standalone=None, doctype=None, compression=0, + bint exclusive=False, inclusive_ns_prefixes=None, + bint with_comments=True, bint strip_text=False, + docstring=None): + """write(self, file, encoding=None, method="xml", + pretty_print=False, xml_declaration=None, with_tail=True, + standalone=None, doctype=None, compression=0, + exclusive=False, inclusive_ns_prefixes=None, + with_comments=True, strip_text=False) + + Write the tree to a filename, file or file-like object. + + Defaults to ASCII encoding and writing a declaration as needed. + + The keyword argument 'method' selects the output method: + 'xml', 'html', 'text', 'c14n' or 'c14n2'. Default is 'xml'. + + With ``method="c14n"`` (C14N version 1), the options ``exclusive``, + ``with_comments`` and ``inclusive_ns_prefixes`` request exclusive + C14N, include comments, and list the inclusive prefixes respectively. + + With ``method="c14n2"`` (C14N version 2), the ``with_comments`` and + ``strip_text`` options control the output of comments and text space + according to C14N 2.0. + + Passing a boolean value to the ``standalone`` option will + output an XML declaration with the corresponding + ``standalone`` flag. + + The ``doctype`` option allows passing in a plain string that will + be serialised before the XML tree. Note that passing in non + well-formed content here will make the XML output non well-formed. + Also, an existing doctype in the document tree will not be removed + when serialising an ElementTree instance. + + The ``compression`` option enables GZip compression level 1-9. + + The ``inclusive_ns_prefixes`` should be a list of namespace strings + (i.e. ['xs', 'xsi']) that will be promoted to the top-level element + during exclusive C14N serialisation. This parameter is ignored if + exclusive mode=False. + + If exclusive=True and no list is provided, a namespace will only be + rendered if it is used by the immediate parent or one of its attributes + and its prefix and values have not already been rendered by an ancestor + of the namespace node's parent element. + """ + cdef bint write_declaration + cdef int is_standalone + + self._assertHasRoot() + _assertValidNode(self._context_node) + if compression is None or compression < 0: + compression = 0 + + # C14N serialisation + if method in ('c14n', 'c14n2'): + if encoding is not None: + raise ValueError("Cannot specify encoding with C14N") + if xml_declaration: + raise ValueError("Cannot enable XML declaration in C14N") + + if method == 'c14n': + _tofilelikeC14N(file, self._context_node, exclusive, with_comments, + compression, inclusive_ns_prefixes) + else: # c14n2 + with _open_utf8_file(file, compression=compression) as f: + target = C14NWriterTarget( + f.write, with_comments=with_comments, strip_text=strip_text) + _tree_to_target(self, target) + return + + if not with_comments: + raise ValueError("Can only discard comments in C14N serialisation") + # suppress decl. in default case (purely for ElementTree compatibility) + if xml_declaration is not None: + write_declaration = xml_declaration + if encoding is None: + encoding = 'ASCII' + else: + encoding = encoding.upper() + elif encoding is None: + encoding = 'ASCII' + write_declaration = 0 + else: + encoding = encoding.upper() + write_declaration = encoding not in ( + 'US-ASCII', 'ASCII', 'UTF8', 'UTF-8') + if standalone is None: + is_standalone = -1 + elif standalone: + write_declaration = 1 + is_standalone = 1 + else: + write_declaration = 1 + is_standalone = 0 + + if docstring is not None and doctype is None: + import warnings + warnings.warn( + "The 'docstring' option is deprecated. Use 'doctype' instead.", + DeprecationWarning) + doctype = docstring + + _tofilelike(file, self._context_node, encoding, doctype, method, + write_declaration, 1, pretty_print, with_tail, + is_standalone, compression) + + def getpath(self, _Element element not None): + """getpath(self, element) + + Returns a structural, absolute XPath expression to find the element. + + For namespaced elements, the expression uses prefixes from the + document, which therefore need to be provided in order to make any + use of the expression in XPath. + + Also see the method getelementpath(self, element), which returns a + self-contained ElementPath expression. + """ + cdef _Document doc + cdef _Element root + cdef xmlDoc* c_doc + _assertValidNode(element) + if self._context_node is not None: + root = self._context_node + doc = root._doc + elif self._doc is not None: + doc = self._doc + root = doc.getroot() + else: + raise ValueError, "Element is not in this tree." + _assertValidDoc(doc) + _assertValidNode(root) + if element._doc is not doc: + raise ValueError, "Element is not in this tree." + + c_doc = _fakeRootDoc(doc._c_doc, root._c_node) + c_path = tree.xmlGetNodePath(element._c_node) + _destroyFakeDoc(doc._c_doc, c_doc) + if c_path is NULL: + raise MemoryError() + path = funicode(c_path) + tree.xmlFree(c_path) + return path + + def getelementpath(self, _Element element not None): + """getelementpath(self, element) + + Returns a structural, absolute ElementPath expression to find the + element. This path can be used in the .find() method to look up + the element, provided that the elements along the path and their + list of immediate children were not modified in between. + + ElementPath has the advantage over an XPath expression (as returned + by the .getpath() method) that it does not require additional prefix + declarations. It is always self-contained. + """ + cdef _Element root + cdef Py_ssize_t count + _assertValidNode(element) + if element._c_node.type != tree.XML_ELEMENT_NODE: + raise ValueError, "input is not an Element" + if self._context_node is not None: + root = self._context_node + elif self._doc is not None: + root = self._doc.getroot() + else: + raise ValueError, "Element is not in this tree" + _assertValidNode(root) + if element._doc is not root._doc: + raise ValueError, "Element is not in this tree" + + path = [] + c_element = element._c_node + while c_element is not root._c_node: + c_name = c_element.name + c_href = _getNs(c_element) + tag = _namespacedNameFromNsName(c_href, c_name) + if c_href is NULL: + c_href = <const_xmlChar*>b'' # no namespace (NULL is wildcard) + # use tag[N] if there are preceding siblings with the same tag + count = 0 + c_node = c_element.prev + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE: + if _tagMatches(c_node, c_href, c_name): + count += 1 + c_node = c_node.prev + if count: + tag = f'{tag}[{count+1}]' + else: + # use tag[1] if there are following siblings with the same tag + c_node = c_element.next + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE: + if _tagMatches(c_node, c_href, c_name): + tag += '[1]' + break + c_node = c_node.next + + path.append(tag) + c_element = c_element.parent + if c_element is NULL or c_element.type != tree.XML_ELEMENT_NODE: + raise ValueError, "Element is not in this tree." + if not path: + return '.' + path.reverse() + return '/'.join(path) + + def getiterator(self, tag=None, *tags): + """getiterator(self, *tags, tag=None) + + Returns a sequence or iterator of all elements in document order + (depth first pre-order), starting with the root element. + + Can be restricted to find only elements with specific tags, + see `_Element.iter`. + + :deprecated: Note that this method is deprecated as of + ElementTree 1.3 and lxml 2.0. It returns an iterator in + lxml, which diverges from the original ElementTree + behaviour. If you want an efficient iterator, use the + ``tree.iter()`` method instead. You should only use this + method in new code if you require backwards compatibility + with older versions of lxml or ElementTree. + """ + root = self.getroot() + if root is None: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return root.getiterator(*tags) + + def iter(self, tag=None, *tags): + """iter(self, tag=None, *tags) + + Creates an iterator for the root element. The iterator loops over + all elements in this tree, in document order. Note that siblings + of the root element (comments or processing instructions) are not + returned by the iterator. + + Can be restricted to find only elements with specific tags, + see `_Element.iter`. + """ + root = self.getroot() + if root is None: + return ITER_EMPTY + if tag is not None: + tags += (tag,) + return root.iter(*tags) + + def find(self, path, namespaces=None): + """find(self, path, namespaces=None) + + Finds the first toplevel element with given tag. Same as + ``tree.getroot().find(path)``. + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + self._assertHasRoot() + root = self.getroot() + if _isString(path): + if path[:1] == "/": + path = "." + path + from warnings import warn + warn( + "This search incorrectly ignores the root element, and will be " + "fixed in a future version. If you rely on the current " + f"behaviour, change it to {path!r}", + FutureWarning, stacklevel=1 + ) + return root.find(path, namespaces) + + def findtext(self, path, default=None, namespaces=None): + """findtext(self, path, default=None, namespaces=None) + + Finds the text for the first element matching the ElementPath + expression. Same as getroot().findtext(path) + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + self._assertHasRoot() + root = self.getroot() + if _isString(path): + if path[:1] == "/": + path = "." + path + from warnings import warn + warn( + "This search incorrectly ignores the root element, and will be " + "fixed in a future version. If you rely on the current " + f"behaviour, change it to {path!r}", + FutureWarning, stacklevel=1 + ) + return root.findtext(path, default, namespaces) + + def findall(self, path, namespaces=None): + """findall(self, path, namespaces=None) + + Finds all elements matching the ElementPath expression. Same as + getroot().findall(path). + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + self._assertHasRoot() + root = self.getroot() + if _isString(path): + if path[:1] == "/": + path = "." + path + from warnings import warn + warn( + "This search incorrectly ignores the root element, and will be " + "fixed in a future version. If you rely on the current " + f"behaviour, change it to {path!r}", + FutureWarning, stacklevel=1 + ) + return root.findall(path, namespaces) + + def iterfind(self, path, namespaces=None): + """iterfind(self, path, namespaces=None) + + Iterates over all elements matching the ElementPath expression. + Same as getroot().iterfind(path). + + The optional ``namespaces`` argument accepts a + prefix-to-namespace mapping that allows the usage of XPath + prefixes in the path expression. + """ + self._assertHasRoot() + root = self.getroot() + if _isString(path): + if path[:1] == "/": + path = "." + path + from warnings import warn + warn( + "This search incorrectly ignores the root element, and will be " + "fixed in a future version. If you rely on the current " + f"behaviour, change it to {path!r}", + FutureWarning, stacklevel=1 + ) + return root.iterfind(path, namespaces) + + def xpath(self, _path, *, namespaces=None, extensions=None, + smart_strings=True, **_variables): + """xpath(self, _path, namespaces=None, extensions=None, smart_strings=True, **_variables) + + XPath evaluate in context of document. + + ``namespaces`` is an optional dictionary with prefix to namespace URI + mappings, used by XPath. ``extensions`` defines additional extension + functions. + + Returns a list (nodeset), or bool, float or string. + + In case of a list result, return Element for element nodes, + string for text and attribute values. + + Note: if you are going to apply multiple XPath expressions + against the same document, it is more efficient to use + XPathEvaluator directly. + """ + self._assertHasRoot() + evaluator = XPathDocumentEvaluator(self, namespaces=namespaces, + extensions=extensions, + smart_strings=smart_strings) + return evaluator(_path, **_variables) + + def xslt(self, _xslt, extensions=None, access_control=None, **_kw): + """xslt(self, _xslt, extensions=None, access_control=None, **_kw) + + Transform this document using other document. + + xslt is a tree that should be XSLT + keyword parameters are XSLT transformation parameters. + + Returns the transformed tree. + + Note: if you are going to apply the same XSLT stylesheet against + multiple documents, it is more efficient to use the XSLT + class directly. + """ + self._assertHasRoot() + style = XSLT(_xslt, extensions=extensions, + access_control=access_control) + return style(self, **_kw) + + def relaxng(self, relaxng): + """relaxng(self, relaxng) + + Validate this document using other document. + + The relaxng argument is a tree that should contain a Relax NG schema. + + Returns True or False, depending on whether validation + succeeded. + + Note: if you are going to apply the same Relax NG schema against + multiple documents, it is more efficient to use the RelaxNG + class directly. + """ + self._assertHasRoot() + schema = RelaxNG(relaxng) + return schema.validate(self) + + def xmlschema(self, xmlschema): + """xmlschema(self, xmlschema) + + Validate this document using other document. + + The xmlschema argument is a tree that should contain an XML Schema. + + Returns True or False, depending on whether validation + succeeded. + + Note: If you are going to apply the same XML Schema against + multiple documents, it is more efficient to use the XMLSchema + class directly. + """ + self._assertHasRoot() + schema = XMLSchema(xmlschema) + return schema.validate(self) + + def xinclude(self): + """xinclude(self) + + Process the XInclude nodes in this document and include the + referenced XML fragments. + + There is support for loading files through the file system, HTTP and + FTP. + + Note that XInclude does not support custom resolvers in Python space + due to restrictions of libxml2 <= 2.6.29. + """ + self._assertHasRoot() + XInclude()(self._context_node) + + def write_c14n(self, file, *, bint exclusive=False, bint with_comments=True, + compression=0, inclusive_ns_prefixes=None): + """write_c14n(self, file, exclusive=False, with_comments=True, + compression=0, inclusive_ns_prefixes=None) + + C14N write of document. Always writes UTF-8. + + The ``compression`` option enables GZip compression level 1-9. + + The ``inclusive_ns_prefixes`` should be a list of namespace strings + (i.e. ['xs', 'xsi']) that will be promoted to the top-level element + during exclusive C14N serialisation. This parameter is ignored if + exclusive mode=False. + + If exclusive=True and no list is provided, a namespace will only be + rendered if it is used by the immediate parent or one of its attributes + and its prefix and values have not already been rendered by an ancestor + of the namespace node's parent element. + + NOTE: This method is deprecated as of lxml 4.4 and will be removed in a + future release. Use ``.write(f, method="c14n")`` instead. + """ + self._assertHasRoot() + _assertValidNode(self._context_node) + if compression is None or compression < 0: + compression = 0 + + _tofilelikeC14N(file, self._context_node, exclusive, with_comments, + compression, inclusive_ns_prefixes) + +cdef _ElementTree _elementTreeFactory(_Document doc, _Element context_node): + return _newElementTree(doc, context_node, _ElementTree) + +cdef _ElementTree _newElementTree(_Document doc, _Element context_node, + object baseclass): + cdef _ElementTree result + result = baseclass() + if context_node is None and doc is not None: + context_node = doc.getroot() + if context_node is None: + _assertValidDoc(doc) + result._doc = doc + else: + _assertValidNode(context_node) + result._context_node = context_node + return result + + +@cython.final +@cython.freelist(16) +cdef class _Attrib: + """A dict-like proxy for the ``Element.attrib`` property. + """ + cdef _Element _element + def __cinit__(self, _Element element not None): + _assertValidNode(element) + self._element = element + + # MANIPULATORS + def __setitem__(self, key, value): + _assertValidNode(self._element) + _setAttributeValue(self._element, key, value) + + def __delitem__(self, key): + _assertValidNode(self._element) + _delAttribute(self._element, key) + + def update(self, sequence_or_dict): + _assertValidNode(self._element) + if isinstance(sequence_or_dict, (dict, _Attrib)): + sequence_or_dict = sequence_or_dict.items() + for key, value in sequence_or_dict: + _setAttributeValue(self._element, key, value) + + def pop(self, key, *default): + if len(default) > 1: + raise TypeError, f"pop expected at most 2 arguments, got {len(default)+1}" + _assertValidNode(self._element) + result = _getAttributeValue(self._element, key, None) + if result is None: + if not default: + raise KeyError, key + result = default[0] + else: + _delAttribute(self._element, key) + return result + + def clear(self): + _assertValidNode(self._element) + c_attrs = self._element._c_node.properties + if c_attrs: + self._element._c_node.properties = NULL + tree.xmlFreePropList(c_attrs) + + # ACCESSORS + def __repr__(self): + _assertValidNode(self._element) + return repr(dict( _collectAttributes(self._element._c_node, 3) )) + + def __copy__(self): + _assertValidNode(self._element) + return dict(_collectAttributes(self._element._c_node, 3)) + + def __deepcopy__(self, memo): + _assertValidNode(self._element) + return dict(_collectAttributes(self._element._c_node, 3)) + + def __getitem__(self, key): + _assertValidNode(self._element) + result = _getAttributeValue(self._element, key, None) + if result is None: + raise KeyError, key + return result + + def __bool__(self): + _assertValidNode(self._element) + cdef xmlAttr* c_attr = self._element._c_node.properties + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + return 1 + c_attr = c_attr.next + return 0 + + def __len__(self): + _assertValidNode(self._element) + cdef xmlAttr* c_attr = self._element._c_node.properties + cdef Py_ssize_t c = 0 + while c_attr is not NULL: + if c_attr.type == tree.XML_ATTRIBUTE_NODE: + c += 1 + c_attr = c_attr.next + return c + + def get(self, key, default=None): + _assertValidNode(self._element) + return _getAttributeValue(self._element, key, default) + + def keys(self): + _assertValidNode(self._element) + return _collectAttributes(self._element._c_node, 1) + + def __iter__(self): + _assertValidNode(self._element) + return iter(_collectAttributes(self._element._c_node, 1)) + + def iterkeys(self): + _assertValidNode(self._element) + return iter(_collectAttributes(self._element._c_node, 1)) + + def values(self): + _assertValidNode(self._element) + return _collectAttributes(self._element._c_node, 2) + + def itervalues(self): + _assertValidNode(self._element) + return iter(_collectAttributes(self._element._c_node, 2)) + + def items(self): + _assertValidNode(self._element) + return _collectAttributes(self._element._c_node, 3) + + def iteritems(self): + _assertValidNode(self._element) + return iter(_collectAttributes(self._element._c_node, 3)) + + def has_key(self, key): + _assertValidNode(self._element) + return key in self + + def __contains__(self, key): + _assertValidNode(self._element) + cdef xmlNode* c_node + ns, tag = _getNsTag(key) + c_node = self._element._c_node + c_href = <const_xmlChar*>NULL if ns is None else _xcstr(ns) + return 1 if tree.xmlHasNsProp(c_node, _xcstr(tag), c_href) else 0 + + def __richcmp__(self, other, int op): + try: + one = dict(self.items()) + if not isinstance(other, dict): + other = dict(other) + except (TypeError, ValueError): + return NotImplemented + return python.PyObject_RichCompare(one, other, op) + +MutableMapping.register(_Attrib) + + +@cython.final +@cython.internal +cdef class _AttribIterator: + """Attribute iterator - for internal use only! + """ + # XML attributes must not be removed while running! + cdef _Element _node + cdef xmlAttr* _c_attr + cdef int _keysvalues # 1 - keys, 2 - values, 3 - items (key, value) + def __iter__(self): + return self + + def __next__(self): + cdef xmlAttr* c_attr + if self._node is None: + raise StopIteration + c_attr = self._c_attr + while c_attr is not NULL and c_attr.type != tree.XML_ATTRIBUTE_NODE: + c_attr = c_attr.next + if c_attr is NULL: + self._node = None + raise StopIteration + + self._c_attr = c_attr.next + if self._keysvalues == 1: + return _namespacedName(<xmlNode*>c_attr) + elif self._keysvalues == 2: + return _attributeValue(self._node._c_node, c_attr) + else: + return (_namespacedName(<xmlNode*>c_attr), + _attributeValue(self._node._c_node, c_attr)) + +cdef object _attributeIteratorFactory(_Element element, int keysvalues): + cdef _AttribIterator attribs + if element._c_node.properties is NULL: + return ITER_EMPTY + attribs = _AttribIterator() + attribs._node = element + attribs._c_attr = element._c_node.properties + attribs._keysvalues = keysvalues + return attribs + + +cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, + type LxmlElementTagMatcherType ]: + """ + Dead but public. :) + """ + cdef object _pystrings + cdef int _node_type + cdef char* _href + cdef char* _name + cdef _initTagMatch(self, tag): + self._href = NULL + self._name = NULL + if tag is None: + self._node_type = 0 + elif tag is Comment: + self._node_type = tree.XML_COMMENT_NODE + elif tag is ProcessingInstruction: + self._node_type = tree.XML_PI_NODE + elif tag is Entity: + self._node_type = tree.XML_ENTITY_REF_NODE + elif tag is Element: + self._node_type = tree.XML_ELEMENT_NODE + else: + self._node_type = tree.XML_ELEMENT_NODE + self._pystrings = _getNsTag(tag) + if self._pystrings[0] is not None: + self._href = _cstr(self._pystrings[0]) + self._name = _cstr(self._pystrings[1]) + if self._name[0] == c'*' and self._name[1] == c'\0': + self._name = NULL + +cdef public class _ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator, type LxmlElementIteratorType ]: + """ + Dead but public. :) + """ + # we keep Python references here to control GC + cdef _Element _node + cdef _node_to_node_function _next_element + def __iter__(self): + return self + + cdef void _storeNext(self, _Element node): + cdef xmlNode* c_node + c_node = self._next_element(node._c_node) + while c_node is not NULL and \ + self._node_type != 0 and \ + (<tree.xmlElementType>self._node_type != c_node.type or + not _tagMatches(c_node, <const_xmlChar*>self._href, <const_xmlChar*>self._name)): + c_node = self._next_element(c_node) + if c_node is NULL: + self._node = None + else: + # Python ref: + self._node = _elementFactory(node._doc, c_node) + + def __next__(self): + cdef xmlNode* c_node + cdef _Element current_node + if self._node is None: + raise StopIteration + # Python ref: + current_node = self._node + self._storeNext(current_node) + return current_node + +@cython.final +@cython.internal +cdef class _MultiTagMatcher: + """ + Match an xmlNode against a list of tags. + """ + cdef list _py_tags + cdef qname* _cached_tags + cdef size_t _tag_count + cdef size_t _cached_size + cdef _Document _cached_doc + cdef int _node_types + + def __cinit__(self, tags): + self._py_tags = [] + self.initTagMatch(tags) + + def __dealloc__(self): + self._clear() + + cdef bint rejectsAll(self) noexcept: + return not self._tag_count and not self._node_types + + cdef bint rejectsAllAttributes(self) noexcept: + return not self._tag_count + + cdef bint matchesType(self, int node_type) noexcept: + if node_type == tree.XML_ELEMENT_NODE and self._tag_count: + return True + return self._node_types & (1 << node_type) + + cdef void _clear(self) noexcept: + cdef size_t i, count + count = self._tag_count + self._tag_count = 0 + if self._cached_tags: + for i in range(count): + cpython.ref.Py_XDECREF(self._cached_tags[i].href) + python.lxml_free(self._cached_tags) + self._cached_tags = NULL + + cdef initTagMatch(self, tags): + self._cached_doc = None + del self._py_tags[:] + self._clear() + if tags is None or tags == (): + # no selection in tags argument => match anything + self._node_types = ( + 1 << tree.XML_COMMENT_NODE | + 1 << tree.XML_PI_NODE | + 1 << tree.XML_ENTITY_REF_NODE | + 1 << tree.XML_ELEMENT_NODE) + else: + self._node_types = 0 + self._storeTags(tags, set()) + + cdef _storeTags(self, tag, set seen): + if tag is Comment: + self._node_types |= 1 << tree.XML_COMMENT_NODE + elif tag is ProcessingInstruction: + self._node_types |= 1 << tree.XML_PI_NODE + elif tag is Entity: + self._node_types |= 1 << tree.XML_ENTITY_REF_NODE + elif tag is Element: + self._node_types |= 1 << tree.XML_ELEMENT_NODE + elif python._isString(tag): + if tag in seen: + return + seen.add(tag) + if tag in ('*', '{*}*'): + self._node_types |= 1 << tree.XML_ELEMENT_NODE + else: + href, name = _getNsTag(tag) + if name == b'*': + name = None + if href is None: + href = b'' # no namespace + elif href == b'*': + href = None # wildcard: any namespace, including none + self._py_tags.append((href, name)) + elif isinstance(tag, QName): + self._storeTags(tag.text, seen) + else: + # support a sequence of tags + for item in tag: + self._storeTags(item, seen) + + cdef inline int cacheTags(self, _Document doc, bint force_into_dict=False) except -1: + """ + Look up the tag names in the doc dict to enable string pointer comparisons. + """ + cdef size_t dict_size = tree.xmlDictSize(doc._c_doc.dict) + if doc is self._cached_doc and dict_size == self._cached_size: + # doc and dict didn't change => names already cached + return 0 + self._tag_count = 0 + if not self._py_tags: + self._cached_doc = doc + self._cached_size = dict_size + return 0 + if not self._cached_tags: + self._cached_tags = <qname*>python.lxml_malloc(len(self._py_tags), sizeof(qname)) + if not self._cached_tags: + self._cached_doc = None + raise MemoryError() + self._tag_count = <size_t>_mapTagsToQnameMatchArray( + doc._c_doc, self._py_tags, self._cached_tags, force_into_dict) + self._cached_doc = doc + self._cached_size = dict_size + return 0 + + cdef inline bint matches(self, xmlNode* c_node) noexcept: + cdef qname* c_qname + if self._node_types & (1 << c_node.type): + return True + elif c_node.type == tree.XML_ELEMENT_NODE: + for c_qname in self._cached_tags[:self._tag_count]: + if _tagMatchesExactly(c_node, c_qname): + return True + return False + + cdef inline bint matchesNsTag(self, const_xmlChar* c_href, + const_xmlChar* c_name) noexcept: + cdef qname* c_qname + if self._node_types & (1 << tree.XML_ELEMENT_NODE): + return True + for c_qname in self._cached_tags[:self._tag_count]: + if _nsTagMatchesExactly(c_href, c_name, c_qname): + return True + return False + + cdef inline bint matchesAttribute(self, xmlAttr* c_attr) noexcept: + """Attribute matches differ from Element matches in that they do + not care about node types. + """ + cdef qname* c_qname + for c_qname in self._cached_tags[:self._tag_count]: + if _tagMatchesExactly(<xmlNode*>c_attr, c_qname): + return True + return False + +cdef class _ElementMatchIterator: + cdef _Element _node + cdef _node_to_node_function _next_element + cdef _MultiTagMatcher _matcher + + @cython.final + cdef _initTagMatcher(self, tags): + self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tags) + + def __iter__(self): + return self + + @cython.final + cdef int _storeNext(self, _Element node) except -1: + self._matcher.cacheTags(node._doc) + c_node = self._next_element(node._c_node) + while c_node is not NULL and not self._matcher.matches(c_node): + c_node = self._next_element(c_node) + # store Python ref to next node to make sure it's kept alive + self._node = _elementFactory(node._doc, c_node) if c_node is not NULL else None + return 0 + + def __next__(self): + cdef _Element current_node = self._node + if current_node is None: + raise StopIteration + self._storeNext(current_node) + return current_node + +cdef class ElementChildIterator(_ElementMatchIterator): + """ElementChildIterator(self, node, tag=None, reversed=False) + Iterates over the children of an element. + """ + def __cinit__(self, _Element node not None, tag=None, *, bint reversed=False): + cdef xmlNode* c_node + _assertValidNode(node) + self._initTagMatcher(tag) + if reversed: + c_node = _findChildBackwards(node._c_node, 0) + self._next_element = _previousElement + else: + c_node = _findChildForwards(node._c_node, 0) + self._next_element = _nextElement + self._matcher.cacheTags(node._doc) + while c_node is not NULL and not self._matcher.matches(c_node): + c_node = self._next_element(c_node) + # store Python ref to next node to make sure it's kept alive + self._node = _elementFactory(node._doc, c_node) if c_node is not NULL else None + +cdef class SiblingsIterator(_ElementMatchIterator): + """SiblingsIterator(self, node, tag=None, preceding=False) + Iterates over the siblings of an element. + + You can pass the boolean keyword ``preceding`` to specify the direction. + """ + def __cinit__(self, _Element node not None, tag=None, *, bint preceding=False): + _assertValidNode(node) + self._initTagMatcher(tag) + if preceding: + self._next_element = _previousElement + else: + self._next_element = _nextElement + self._storeNext(node) + +cdef class AncestorsIterator(_ElementMatchIterator): + """AncestorsIterator(self, node, tag=None) + Iterates over the ancestors of an element (from parent to parent). + """ + def __cinit__(self, _Element node not None, tag=None): + _assertValidNode(node) + self._initTagMatcher(tag) + self._next_element = _parentElement + self._storeNext(node) + +cdef class ElementDepthFirstIterator: + """ElementDepthFirstIterator(self, node, tag=None, inclusive=True) + Iterates over an element and its sub-elements in document order (depth + first pre-order). + + Note that this also includes comments, entities and processing + instructions. To filter them out, check if the ``tag`` property + of the returned element is a string (i.e. not None and not a + factory function), or pass the ``Element`` factory for the ``tag`` + argument to receive only Elements. + + If the optional ``tag`` argument is not None, the iterator returns only + the elements that match the respective name and namespace. + + The optional boolean argument 'inclusive' defaults to True and can be set + to False to exclude the start element itself. + + Note that the behaviour of this iterator is completely undefined if the + tree it traverses is modified during iteration. + """ + # we keep Python references here to control GC + # keep the next Element after the one we return, and the (s)top node + cdef _Element _next_node + cdef _Element _top_node + cdef _MultiTagMatcher _matcher + def __cinit__(self, _Element node not None, tag=None, *, bint inclusive=True): + _assertValidNode(node) + self._top_node = node + self._next_node = node + self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag) + self._matcher.cacheTags(node._doc) + if not inclusive or not self._matcher.matches(node._c_node): + # find start node (this cannot raise StopIteration, self._next_node != None) + next(self) + + def __iter__(self): + return self + + def __next__(self): + cdef xmlNode* c_node + cdef _Element current_node = self._next_node + if current_node is None: + raise StopIteration + c_node = current_node._c_node + self._matcher.cacheTags(current_node._doc) + if not self._matcher._tag_count: + # no tag name was found in the dict => not in document either + # try to match by node type + c_node = self._nextNodeAnyTag(c_node) + else: + c_node = self._nextNodeMatchTag(c_node) + if c_node is NULL: + self._next_node = None + else: + self._next_node = _elementFactory(current_node._doc, c_node) + return current_node + + @cython.final + cdef xmlNode* _nextNodeAnyTag(self, xmlNode* c_node) noexcept: + cdef int node_types = self._matcher._node_types + if not node_types: + return NULL + tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0) + if node_types & (1 << c_node.type): + return c_node + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + return NULL + + @cython.final + cdef xmlNode* _nextNodeMatchTag(self, xmlNode* c_node) noexcept: + tree.BEGIN_FOR_EACH_ELEMENT_FROM(self._top_node._c_node, c_node, 0) + if self._matcher.matches(c_node): + return c_node + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + return NULL + + +cdef class ElementTextIterator: + """ElementTextIterator(self, element, tag=None, with_tail=True) + Iterates over the text content of a subtree. + + You can pass the ``tag`` keyword argument to restrict text content to a + specific tag name. + + You can set the ``with_tail`` keyword argument to ``False`` to skip over + tail text (e.g. if you know that it's only whitespace from pretty-printing). + """ + cdef object _events + cdef _Element _start_element + def __cinit__(self, _Element element not None, tag=None, *, bint with_tail=True): + _assertValidNode(element) + if with_tail: + events = ("start", "comment", "pi", "end") + else: + events = ("start",) + self._start_element = element + self._events = iterwalk(element, events=events, tag=tag) + + def __iter__(self): + return self + + def __next__(self): + cdef _Element element + result = None + while result is None: + event, element = next(self._events) # raises StopIteration + if event == "start": + result = element.text + elif element is not self._start_element: + result = element.tail + return result + + +cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf) except NULL: + cdef xmlNode* c_node + c_node = tree.xmlNewDocNode(c_doc, NULL, _xcstr(name_utf), NULL) + return c_node + +cdef xmlNode* _createComment(xmlDoc* c_doc, const_xmlChar* text) noexcept: + cdef xmlNode* c_node + c_node = tree.xmlNewDocComment(c_doc, text) + return c_node + +cdef xmlNode* _createPI(xmlDoc* c_doc, const_xmlChar* target, const_xmlChar* text) noexcept: + cdef xmlNode* c_node + c_node = tree.xmlNewDocPI(c_doc, target, text) + return c_node + +cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name) noexcept: + cdef xmlNode* c_node + c_node = tree.xmlNewReference(c_doc, name) + return c_node + +# module-level API for ElementTree + +def Element(_tag, attrib=None, nsmap=None, **_extra): + """Element(_tag, attrib=None, nsmap=None, **_extra) + + Element factory. This function returns an object implementing the + Element interface. + + Also look at the `_Element.makeelement()` and + `_BaseParser.makeelement()` methods, which provide a faster way to + create an Element within a specific document or parser context. + """ + return _makeElement(_tag, NULL, None, None, None, None, + attrib, nsmap, _extra) + + +def Comment(text=None): + """Comment(text=None) + + Comment element factory. This factory function creates a special element that will + be serialized as an XML comment. + """ + cdef _Document doc + cdef xmlNode* c_node + cdef xmlDoc* c_doc + + if text is None: + text = b'' + else: + text = _utf8(text) + if b'--' in text or text.endswith(b'-'): + raise ValueError("Comment may not contain '--' or end with '-'") + + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + c_node = _createComment(c_doc, _xcstr(text)) + tree.xmlAddChild(<xmlNode*>c_doc, c_node) + return _elementFactory(doc, c_node) + + +def ProcessingInstruction(target, text=None): + """ProcessingInstruction(target, text=None) + + ProcessingInstruction element factory. This factory function creates a + special element that will be serialized as an XML processing instruction. + """ + cdef _Document doc + cdef xmlNode* c_node + cdef xmlDoc* c_doc + + target = _utf8(target) + _tagValidOrRaise(target) + if target.lower() == b'xml': + raise ValueError, f"Invalid PI name '{target}'" + + if text is None: + text = b'' + else: + text = _utf8(text) + if b'?>' in text: + raise ValueError, "PI text must not contain '?>'" + + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) + tree.xmlAddChild(<xmlNode*>c_doc, c_node) + return _elementFactory(doc, c_node) + +PI = ProcessingInstruction + + +cdef class CDATA: + """CDATA(data) + + CDATA factory. This factory creates an opaque data object that + can be used to set Element text. The usual way to use it is:: + + >>> el = Element('content') + >>> el.text = CDATA('a string') + + >>> print(el.text) + a string + >>> print(tostring(el, encoding="unicode")) + <content><![CDATA[a string]]></content> + """ + cdef bytes _utf8_data + def __cinit__(self, data): + self._utf8_data = _utf8(data) + + +def Entity(name): + """Entity(name) + + Entity factory. This factory function creates a special element + that will be serialized as an XML entity reference or character + reference. Note, however, that entities will not be automatically + declared in the document. A document that uses entity references + requires a DTD to define the entities. + """ + cdef _Document doc + cdef xmlNode* c_node + cdef xmlDoc* c_doc + name_utf = _utf8(name) + c_name = _xcstr(name_utf) + if c_name[0] == c'#': + if not _characterReferenceIsValid(c_name + 1): + raise ValueError, f"Invalid character reference: '{name}'" + elif not _xmlNameIsValid(c_name): + raise ValueError, f"Invalid entity reference: '{name}'" + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + c_node = _createEntity(c_doc, c_name) + tree.xmlAddChild(<xmlNode*>c_doc, c_node) + return _elementFactory(doc, c_node) + + +def SubElement(_Element _parent not None, _tag, + attrib=None, nsmap=None, **_extra): + """SubElement(_parent, _tag, attrib=None, nsmap=None, **_extra) + + Subelement factory. This function creates an element instance, and + appends it to an existing element. + """ + return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra) + + +def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None): + """ElementTree(element=None, file=None, parser=None) + + ElementTree wrapper class. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + cdef xmlNode* c_node_copy + cdef xmlDoc* c_doc + cdef _ElementTree etree + cdef _Document doc + + if element is not None: + doc = element._doc + elif file is not None: + try: + doc = _parseDocument(file, parser, None) + except _TargetParserResult as result_container: + return result_container.result + else: + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, parser) + + return _elementTreeFactory(doc, element) + + +def HTML(text, _BaseParser parser=None, *, base_url=None): + """HTML(text, parser=None, base_url=None) + + Parses an HTML document from a string constant. Returns the root + node (or the result returned by a parser target). This function + can be used to embed "HTML literals" in Python code. + + To override the parser with a different ``HTMLParser`` you can pass it to + the ``parser`` keyword argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + cdef _Document doc + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + if not isinstance(parser, HTMLParser): + parser = __DEFAULT_HTML_PARSER + try: + doc = _parseMemoryDocument(text, base_url, parser) + return doc.getroot() + except _TargetParserResult as result_container: + return result_container.result + + +def XML(text, _BaseParser parser=None, *, base_url=None): + """XML(text, parser=None, base_url=None) + + Parses an XML document or fragment from a string constant. + Returns the root node (or the result returned by a parser target). + This function can be used to embed "XML literals" in Python code, + like in + + >>> root = XML("<root><test/></root>") + >>> print(root.tag) + root + + To override the parser with a different ``XMLParser`` you can pass it to + the ``parser`` keyword argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + cdef _Document doc + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + if not isinstance(parser, XMLParser): + parser = __DEFAULT_XML_PARSER + try: + doc = _parseMemoryDocument(text, base_url, parser) + return doc.getroot() + except _TargetParserResult as result_container: + return result_container.result + + +def fromstring(text, _BaseParser parser=None, *, base_url=None): + """fromstring(text, parser=None, base_url=None) + + Parses an XML document or fragment from a string. Returns the + root node (or the result returned by a parser target). + + To override the default parser with a different parser you can pass it to + the ``parser`` keyword argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + cdef _Document doc + try: + doc = _parseMemoryDocument(text, base_url, parser) + return doc.getroot() + except _TargetParserResult as result_container: + return result_container.result + + +def fromstringlist(strings, _BaseParser parser=None): + """fromstringlist(strings, parser=None) + + Parses an XML document from a sequence of strings. Returns the + root node (or the result returned by a parser target). + + To override the default parser with a different parser you can pass it to + the ``parser`` keyword argument. + """ + cdef _Document doc + if isinstance(strings, (bytes, unicode)): + raise ValueError("passing a single string into fromstringlist() is not" + " efficient, use fromstring() instead") + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + feed = parser.feed + for data in strings: + feed(data) + return parser.close() + + +def iselement(element): + """iselement(element) + + Checks if an object appears to be a valid element object. + """ + return isinstance(element, _Element) and (<_Element>element)._c_node is not NULL + + +def indent(tree, space=" ", *, Py_ssize_t level=0): + """indent(tree, space=" ", level=0) + + Indent an XML document by inserting newlines and indentation space + after elements. + + *tree* is the ElementTree or Element to modify. The (root) element + itself will not be changed, but the tail text of all elements in its + subtree will be adapted. + + *space* is the whitespace to insert for each indentation level, two + space characters by default. + + *level* is the initial indentation level. Setting this to a higher + value than 0 can be used for indenting subtrees that are more deeply + nested inside of a document. + """ + root = _rootNodeOrRaise(tree) + if level < 0: + raise ValueError(f"Initial indentation level must be >= 0, got {level}") + if _hasChild(root._c_node): + space = _utf8(space) + indent = b"\n" + level * space + _indent_children(root._c_node, 1, space, [indent, indent + space]) + + +cdef int _indent_children(xmlNode* c_node, Py_ssize_t level, bytes one_space, list indentations) except -1: + # Reuse indentation strings for speed. + if len(indentations) <= level: + indentations.append(indentations[-1] + one_space) + + # Start a new indentation level for the first child. + child_indentation = indentations[level] + if not _hasNonWhitespaceText(c_node): + _setNodeText(c_node, child_indentation) + + # Recursively indent all children. + cdef xmlNode* c_child = _findChildForwards(c_node, 0) + while c_child is not NULL: + if _hasChild(c_child): + _indent_children(c_child, level+1, one_space, indentations) + c_next_child = _nextElement(c_child) + if not _hasNonWhitespaceTail(c_child): + if c_next_child is NULL: + # Dedent after the last child. + child_indentation = indentations[level-1] + _setTailText(c_child, child_indentation) + c_child = c_next_child + return 0 + + +def dump(_Element elem not None, *, bint pretty_print=True, bint with_tail=True): + """dump(elem, pretty_print=True, with_tail=True) + + Writes an element tree or element structure to sys.stdout. This function + should be used for debugging only. + """ + xml = tostring(elem, pretty_print=pretty_print, with_tail=with_tail, encoding='unicode') + if not pretty_print: + xml += '\n' + sys.stdout.write(xml) + + +def tostring(element_or_tree, *, encoding=None, method="xml", + xml_declaration=None, bint pretty_print=False, bint with_tail=True, + standalone=None, doctype=None, + # method='c14n' + bint exclusive=False, inclusive_ns_prefixes=None, + # method='c14n2' + bint with_comments=True, bint strip_text=False, + ): + """tostring(element_or_tree, encoding=None, method="xml", + xml_declaration=None, pretty_print=False, with_tail=True, + standalone=None, doctype=None, + exclusive=False, inclusive_ns_prefixes=None, + with_comments=True, strip_text=False, + ) + + Serialize an element to an encoded string representation of its XML + tree. + + Defaults to ASCII encoding without XML declaration. This + behaviour can be configured with the keyword arguments 'encoding' + (string) and 'xml_declaration' (bool). Note that changing the + encoding to a non UTF-8 compatible encoding will enable a + declaration by default. + + You can also serialise to a Unicode string without declaration by + passing the name ``'unicode'`` as encoding (or the ``str`` function + in Py3 or ``unicode`` in Py2). This changes the return value from + a byte string to an unencoded unicode string. + + The keyword argument 'pretty_print' (bool) enables formatted XML. + + The keyword argument 'method' selects the output method: 'xml', + 'html', plain 'text' (text content without tags), 'c14n' or 'c14n2'. + Default is 'xml'. + + With ``method="c14n"`` (C14N version 1), the options ``exclusive``, + ``with_comments`` and ``inclusive_ns_prefixes`` request exclusive + C14N, include comments, and list the inclusive prefixes respectively. + + With ``method="c14n2"`` (C14N version 2), the ``with_comments`` and + ``strip_text`` options control the output of comments and text space + according to C14N 2.0. + + Passing a boolean value to the ``standalone`` option will output + an XML declaration with the corresponding ``standalone`` flag. + + The ``doctype`` option allows passing in a plain string that will + be serialised before the XML tree. Note that passing in non + well-formed content here will make the XML output non well-formed. + Also, an existing doctype in the document tree will not be removed + when serialising an ElementTree instance. + + You can prevent the tail text of the element from being serialised + by passing the boolean ``with_tail`` option. This has no impact + on the tail text of children, which will always be serialised. + """ + cdef bint write_declaration + cdef int is_standalone + # C14N serialisation + if method in ('c14n', 'c14n2'): + if encoding is not None: + raise ValueError("Cannot specify encoding with C14N") + if xml_declaration: + raise ValueError("Cannot enable XML declaration in C14N") + if method == 'c14n': + return _tostringC14N(element_or_tree, exclusive, with_comments, inclusive_ns_prefixes) + else: + out = BytesIO() + target = C14NWriterTarget( + utf8_writer(out).write, + with_comments=with_comments, strip_text=strip_text) + _tree_to_target(element_or_tree, target) + return out.getvalue() + if not with_comments: + raise ValueError("Can only discard comments in C14N serialisation") + if strip_text: + raise ValueError("Can only strip text in C14N 2.0 serialisation") + if encoding is unicode or (encoding is not None and encoding.lower() == 'unicode'): + if xml_declaration: + raise ValueError, \ + "Serialisation to unicode must not request an XML declaration" + write_declaration = 0 + encoding = unicode + elif xml_declaration is None: + # by default, write an XML declaration only for non-standard encodings + write_declaration = encoding is not None and encoding.upper() not in \ + ('ASCII', 'UTF-8', 'UTF8', 'US-ASCII') + else: + write_declaration = xml_declaration + if encoding is None: + encoding = 'ASCII' + if standalone is None: + is_standalone = -1 + elif standalone: + write_declaration = 1 + is_standalone = 1 + else: + write_declaration = 1 + is_standalone = 0 + + if isinstance(element_or_tree, _Element): + return _tostring(<_Element>element_or_tree, encoding, doctype, method, + write_declaration, 0, pretty_print, with_tail, + is_standalone) + elif isinstance(element_or_tree, _ElementTree): + return _tostring((<_ElementTree>element_or_tree)._context_node, + encoding, doctype, method, write_declaration, 1, + pretty_print, with_tail, is_standalone) + else: + raise TypeError, f"Type '{python._fqtypename(element_or_tree).decode('utf8')}' cannot be serialized." + + + +def tostringlist(element_or_tree, *args, **kwargs): + """tostringlist(element_or_tree, *args, **kwargs) + + Serialize an element to an encoded string representation of its XML + tree, stored in a list of partial strings. + + This is purely for ElementTree 1.3 compatibility. The result is a + single string wrapped in a list. + """ + return [tostring(element_or_tree, *args, **kwargs)] + + +def tounicode(element_or_tree, *, method="xml", bint pretty_print=False, + bint with_tail=True, doctype=None): + """tounicode(element_or_tree, method="xml", pretty_print=False, + with_tail=True, doctype=None) + + Serialize an element to the Python unicode representation of its XML + tree. + + :deprecated: use ``tostring(el, encoding='unicode')`` instead. + + Note that the result does not carry an XML encoding declaration and is + therefore not necessarily suited for serialization to byte streams without + further treatment. + + The boolean keyword argument 'pretty_print' enables formatted XML. + + The keyword argument 'method' selects the output method: 'xml', + 'html' or plain 'text'. + + You can prevent the tail text of the element from being serialised + by passing the boolean ``with_tail`` option. This has no impact + on the tail text of children, which will always be serialised. + """ + if isinstance(element_or_tree, _Element): + return _tostring(<_Element>element_or_tree, unicode, doctype, method, + 0, 0, pretty_print, with_tail, -1) + elif isinstance(element_or_tree, _ElementTree): + return _tostring((<_ElementTree>element_or_tree)._context_node, + unicode, doctype, method, 0, 1, pretty_print, + with_tail, -1) + else: + raise TypeError, f"Type '{type(element_or_tree)}' cannot be serialized." + + +def parse(source, _BaseParser parser=None, *, base_url=None): + """parse(source, parser=None, base_url=None) + + Return an ElementTree object loaded with source elements. If no parser + is provided as second argument, the default parser is used. + + The ``source`` can be any of the following: + + - a file name/path + - a file object + - a file-like object + - a URL using the HTTP or FTP protocol + + To parse from a string, use the ``fromstring()`` function instead. + + Note that it is generally faster to parse from a file path or URL + than from an open file object or file-like object. Transparent + decompression from gzip compressed sources is supported (unless + explicitly disabled in libxml2). + + The ``base_url`` keyword allows setting a URL for the document + when parsing from a file-like object. This is needed when looking + up external entities (DTD, XInclude, ...) with relative paths. + """ + cdef _Document doc + try: + doc = _parseDocument(source, parser, base_url) + return _elementTreeFactory(doc, None) + except _TargetParserResult as result_container: + return result_container.result + + +def adopt_external_document(capsule, _BaseParser parser=None): + """adopt_external_document(capsule, parser=None) + + Unpack a libxml2 document pointer from a PyCapsule and wrap it in an + lxml ElementTree object. + + This allows external libraries to build XML/HTML trees using libxml2 + and then pass them efficiently into lxml for further processing. + + If a ``parser`` is provided, it will be used for configuring the + lxml document. No parsing will be done. + + The capsule must have the name ``"libxml2:xmlDoc"`` and its pointer + value must reference a correct libxml2 document of type ``xmlDoc*``. + The creator of the capsule must take care to correctly clean up the + document using an appropriate capsule destructor. By default, the + libxml2 document will be copied to let lxml safely own the memory + of the internal tree that it uses. + + If the capsule context is non-NULL, it must point to a C string that + can be compared using ``strcmp()``. If the context string equals + ``"destructor:xmlFreeDoc"``, the libxml2 document will not be copied + but the capsule invalidated instead by clearing its destructor and + name. That way, lxml takes ownership of the libxml2 document in memory + without creating a copy first, and the capsule destructor will not be + called. The document will then eventually be cleaned up by lxml using + the libxml2 API function ``xmlFreeDoc()`` once it is no longer used. + + If no copy is made, later modifications of the tree outside of lxml + should not be attempted after transferring the ownership. + """ + cdef xmlDoc* c_doc + cdef bint is_owned = False + c_doc = <xmlDoc*> python.lxml_unpack_xmldoc_capsule(capsule, &is_owned) + doc = _adoptForeignDoc(c_doc, parser, is_owned) + return _elementTreeFactory(doc, None) + + +################################################################################ +# Include submodules + +include "readonlytree.pxi" # Read-only implementation of Element proxies +include "classlookup.pxi" # Element class lookup mechanisms +include "nsclasses.pxi" # Namespace implementation and registry +include "docloader.pxi" # Support for custom document loaders +include "parser.pxi" # XML and HTML parsers +include "saxparser.pxi" # SAX-like Parser interface and tree builder +include "parsertarget.pxi" # ET Parser target +include "serializer.pxi" # XML output functions +include "iterparse.pxi" # incremental XML parsing +include "xmlid.pxi" # XMLID and IDDict +include "xinclude.pxi" # XInclude +include "cleanup.pxi" # Cleanup and recursive element removal functions + + +################################################################################ +# Include submodules for XPath and XSLT + +include "extensions.pxi" # XPath/XSLT extension functions +include "xpath.pxi" # XPath evaluation +include "xslt.pxi" # XSL transformations +include "xsltext.pxi" # XSL extension elements + + +################################################################################ +# Validation + +cdef class DocumentInvalid(LxmlError): + """Validation error. + + Raised by all document validators when their ``assertValid(tree)`` + method fails. + """ + + +cdef class _Validator: + "Base class for XML validators." + cdef _ErrorLog _error_log + def __cinit__(self): + self._error_log = _ErrorLog() + + def validate(self, etree): + """validate(self, etree) + + Validate the document using this schema. + + Returns true if document is valid, false if not. + """ + return self(etree) + + def assertValid(self, etree): + """assertValid(self, etree) + + Raises `DocumentInvalid` if the document does not comply with the schema. + """ + if not self(etree): + raise DocumentInvalid(self._error_log._buildExceptionMessage( + "Document does not comply with schema"), + self._error_log) + + def assert_(self, etree): + """assert_(self, etree) + + Raises `AssertionError` if the document does not comply with the schema. + """ + if not self(etree): + raise AssertionError, self._error_log._buildExceptionMessage( + "Document does not comply with schema") + + cpdef _append_log_message(self, int domain, int type, int level, int line, + message, filename): + self._error_log._receiveGeneric(domain, type, level, line, message, + filename) + + cpdef _clear_error_log(self): + self._error_log.clear() + + @property + def error_log(self): + """The log of validation errors and warnings.""" + assert self._error_log is not None, "XPath evaluator not initialised" + return self._error_log.copy() + +include "dtd.pxi" # DTD +include "relaxng.pxi" # RelaxNG +include "xmlschema.pxi" # XMLSchema +include "schematron.pxi" # Schematron (requires libxml2 2.6.21+) + +################################################################################ +# Public C API + +include "public-api.pxi" + +################################################################################ +# Other stuff + +include "debug.pxi" diff --git a/.venv/lib/python3.12/site-packages/lxml/etree_api.h b/.venv/lib/python3.12/site-packages/lxml/etree_api.h new file mode 100644 index 00000000..1c9b4616 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/etree_api.h @@ -0,0 +1,195 @@ +/* Generated by Cython 3.0.11 */ + +#ifndef __PYX_HAVE_API__lxml__etree +#define __PYX_HAVE_API__lxml__etree +#ifdef __MINGW64__ +#define MS_WIN64 +#endif +#include "Python.h" +#include "etree.h" + +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0; +#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0; +#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0; +#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0; +#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0; +#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; +#define makeElement __pyx_api_f_4lxml_5etree_makeElement +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; +#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement +static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0; +#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction +static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0; +#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass +static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0; +#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass +static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0; +#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback +static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches +static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0; +#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0; +#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise +static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0; +#define hasText __pyx_api_f_4lxml_5etree_hasText +static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0; +#define hasTail __pyx_api_f_4lxml_5etree_hasTail +static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0; +#define textOf __pyx_api_f_4lxml_5etree_textOf +static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0; +#define tailOf __pyx_api_f_4lxml_5etree_tailOf +static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0; +#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText +static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0; +#define setTailText __pyx_api_f_4lxml_5etree_setTailText +static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0; +#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue +static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName +static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; +#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue +static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0; +#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes +static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0; +#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes +static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; +#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue +static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0; +#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute +static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName +static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0; +#define hasChild __pyx_api_f_4lxml_5etree_hasChild +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0; +#define findChild __pyx_api_f_4lxml_5etree_findChild +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0; +#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0; +#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards +static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0; +#define nextElement __pyx_api_f_4lxml_5etree_nextElement +static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0; +#define previousElement __pyx_api_f_4lxml_5etree_previousElement +static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0; +#define appendChild __pyx_api_f_4lxml_5etree_appendChild +static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0; +#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement +static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0; +#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode +static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0; +#define utf8 __pyx_api_f_4lxml_5etree_utf8 +static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0; +#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag +static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0; +#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs +static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0; +#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName +static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0; +#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName +static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0; +#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext +static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0; +#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch +static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix +#ifndef __PYX_HAVE_RT_ImportFunction_3_0_11 +#define __PYX_HAVE_RT_ImportFunction_3_0_11 +static int __Pyx_ImportFunction_3_0_11(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + union { + void (*fp)(void); + void *p; + } tmp; + d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); + if (!d) + goto bad; + cobj = PyDict_GetItemString(d, funcname); + if (!cobj) { + PyErr_Format(PyExc_ImportError, + "%.200s does not export expected C function %.200s", + PyModule_GetName(module), funcname); + goto bad; + } + if (!PyCapsule_IsValid(cobj, sig)) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); + goto bad; + } + tmp.p = PyCapsule_GetPointer(cobj, sig); + *f = tmp.fp; + if (!(*f)) + goto bad; + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(d); + return -1; +} +#endif + + +static int import_lxml__etree(void) { + PyObject *module = 0; + module = PyImport_ImportModule("lxml.etree"); + if (!module) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + Py_DECREF(module); module = 0; + return 0; + bad: + Py_XDECREF(module); + return -1; +} + +#endif /* !__PYX_HAVE_API__lxml__etree */ diff --git a/.venv/lib/python3.12/site-packages/lxml/extensions.pxi b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi new file mode 100644 index 00000000..2a2c94ec --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/extensions.pxi @@ -0,0 +1,833 @@ +# support for extension functions in XPath and XSLT + +cdef class XPathError(LxmlError): + """Base class of all XPath errors. + """ + +cdef class XPathEvalError(XPathError): + """Error during XPath evaluation. + """ + +cdef class XPathFunctionError(XPathEvalError): + """Internal error looking up an XPath extension function. + """ + +cdef class XPathResultError(XPathEvalError): + """Error handling an XPath result. + """ + + +# forward declarations + +ctypedef int (*_register_function)(void* ctxt, name_utf, ns_uri_utf) +cdef class _ExsltRegExp + +################################################################################ +# Base class for XSLT and XPath evaluation contexts: functions, namespaces, ... + +@cython.internal +cdef class _BaseContext: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _Document _doc + cdef dict _extensions + cdef list _namespaces + cdef list _global_namespaces + cdef dict _utf_refs + cdef dict _function_cache + cdef dict _eval_context_dict + cdef bint _build_smart_strings + # for exception handling and temporary reference keeping: + cdef _TempStore _temp_refs + cdef set _temp_documents + cdef _ExceptionContext _exc + cdef _ErrorLog _error_log + + def __cinit__(self): + self._xpathCtxt = NULL + + def __init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings): + cdef _ExsltRegExp _regexp + cdef dict new_extensions + cdef list ns + self._utf_refs = {} + self._global_namespaces = [] + self._function_cache = {} + self._eval_context_dict = None + self._error_log = error_log + + if extensions is not None: + # convert extensions to UTF-8 + if isinstance(extensions, dict): + extensions = (extensions,) + # format: [ {(ns, name):function} ] -> {(ns_utf, name_utf):function} + new_extensions = {} + for extension in extensions: + for (ns_uri, name), function in extension.items(): + if name is None: + raise ValueError, "extensions must have non empty names" + ns_utf = self._to_utf(ns_uri) + name_utf = self._to_utf(name) + new_extensions[(ns_utf, name_utf)] = function + extensions = new_extensions or None + + if namespaces is not None: + if isinstance(namespaces, dict): + namespaces = namespaces.items() + if namespaces: + ns = [] + for prefix, ns_uri in namespaces: + if prefix is None or not prefix: + raise TypeError, \ + "empty namespace prefix is not supported in XPath" + if ns_uri is None or not ns_uri: + raise TypeError, \ + "setting default namespace is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + ns.append( (prefix_utf, ns_uri_utf) ) + namespaces = ns + else: + namespaces = None + + self._doc = None + self._exc = _ExceptionContext() + self._extensions = extensions + self._namespaces = namespaces + self._temp_refs = _TempStore() + self._temp_documents = set() + self._build_smart_strings = build_smart_strings + + if enable_regexp: + _regexp = _ExsltRegExp() + _regexp._register_in_context(self) + + cdef _BaseContext _copy(self): + cdef _BaseContext context + if self._namespaces is not None: + namespaces = self._namespaces[:] + else: + namespaces = None + context = self.__class__(namespaces, None, self._error_log, False, + self._build_smart_strings) + if self._extensions is not None: + context._extensions = self._extensions.copy() + return context + + cdef bytes _to_utf(self, s): + "Convert to UTF-8 and keep a reference to the encoded string" + cdef python.PyObject* dict_result + if s is None: + return None + dict_result = python.PyDict_GetItem(self._utf_refs, s) + if dict_result is not NULL: + return <bytes>dict_result + utf = _utf8(s) + self._utf_refs[s] = utf + if python.IS_PYPY: + # use C level refs, PyPy refs are not enough! + python.Py_INCREF(utf) + return utf + + cdef void _set_xpath_context(self, xpath.xmlXPathContext* xpathCtxt) noexcept: + self._xpathCtxt = xpathCtxt + xpathCtxt.userData = <void*>self + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + xpathCtxt.error = <xmlerror.xmlStructuredErrorFunc> _receiveXPathError + + @cython.final + cdef _register_context(self, _Document doc): + self._doc = doc + self._exc.clear() + + @cython.final + cdef _cleanup_context(self): + #xpath.xmlXPathRegisteredNsCleanup(self._xpathCtxt) + #self.unregisterGlobalNamespaces() + if python.IS_PYPY: + # clean up double refs in PyPy (see "_to_utf()" method) + for ref in self._utf_refs.itervalues(): + python.Py_DECREF(ref) + self._utf_refs.clear() + self._eval_context_dict = None + self._doc = None + + @cython.final + cdef _release_context(self): + if self._xpathCtxt is not NULL: + self._xpathCtxt.userData = NULL + self._xpathCtxt = NULL + + # namespaces (internal UTF-8 methods with leading '_') + + cdef addNamespace(self, prefix, ns_uri): + cdef list namespaces + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + new_item = (prefix_utf, ns_uri_utf) + if self._namespaces is None: + self._namespaces = [new_item] + else: + namespaces = [] + for item in self._namespaces: + if item[0] == prefix_utf: + item = new_item + new_item = None + namespaces.append(item) + if new_item is not None: + namespaces.append(new_item) + self._namespaces = namespaces + if self._xpathCtxt is not NULL: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerNamespace(self, prefix, ns_uri): + if prefix is None: + raise TypeError, "empty prefix is not supported in XPath" + prefix_utf = self._to_utf(prefix) + ns_uri_utf = self._to_utf(ns_uri) + self._global_namespaces.append(prefix_utf) + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerLocalNamespaces(self): + if self._namespaces is None: + return + for prefix_utf, ns_uri_utf in self._namespaces: + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef registerGlobalNamespaces(self): + cdef list ns_prefixes = _find_all_extension_prefixes() + if python.PyList_GET_SIZE(ns_prefixes) > 0: + for prefix_utf, ns_uri_utf in ns_prefixes: + self._global_namespaces.append(prefix_utf) + xpath.xmlXPathRegisterNs( + self._xpathCtxt, _xcstr(prefix_utf), _xcstr(ns_uri_utf)) + + cdef unregisterGlobalNamespaces(self): + if python.PyList_GET_SIZE(self._global_namespaces) > 0: + for prefix_utf in self._global_namespaces: + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), NULL) + del self._global_namespaces[:] + + cdef void _unregisterNamespace(self, prefix_utf) noexcept: + xpath.xmlXPathRegisterNs(self._xpathCtxt, + _xcstr(prefix_utf), NULL) + + # extension functions + + cdef int _addLocalExtensionFunction(self, ns_utf, name_utf, function) except -1: + if self._extensions is None: + self._extensions = {} + self._extensions[(ns_utf, name_utf)] = function + return 0 + + cdef registerGlobalFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result + cdef dict d + for ns_utf, ns_functions in __FUNCTION_NAMESPACE_REGISTRIES.iteritems(): + dict_result = python.PyDict_GetItem( + self._function_cache, ns_utf) + if dict_result is not NULL: + d = <dict>dict_result + else: + d = {} + self._function_cache[ns_utf] = d + for name_utf, function in ns_functions.iteritems(): + d[name_utf] = function + reg_func(ctxt, name_utf, ns_utf) + + cdef registerLocalFunctions(self, void* ctxt, + _register_function reg_func): + cdef python.PyObject* dict_result + cdef dict d + if self._extensions is None: + return # done + last_ns = None + d = None + for (ns_utf, name_utf), function in self._extensions.iteritems(): + if ns_utf is not last_ns or d is None: + last_ns = ns_utf + dict_result = python.PyDict_GetItem( + self._function_cache, ns_utf) + if dict_result is not NULL: + d = <dict>dict_result + else: + d = {} + self._function_cache[ns_utf] = d + d[name_utf] = function + reg_func(ctxt, name_utf, ns_utf) + + cdef unregisterAllFunctions(self, void* ctxt, + _register_function unreg_func): + for ns_utf, functions in self._function_cache.iteritems(): + for name_utf in functions: + unreg_func(ctxt, name_utf, ns_utf) + + cdef unregisterGlobalFunctions(self, void* ctxt, + _register_function unreg_func): + for ns_utf, functions in self._function_cache.items(): + for name_utf in functions: + if self._extensions is None or \ + (ns_utf, name_utf) not in self._extensions: + unreg_func(ctxt, name_utf, ns_utf) + + @cython.final + cdef _find_cached_function(self, const_xmlChar* c_ns_uri, const_xmlChar* c_name): + """Lookup an extension function in the cache and return it. + + Parameters: c_ns_uri may be NULL, c_name must not be NULL + """ + cdef python.PyObject* c_dict + cdef python.PyObject* dict_result + c_dict = python.PyDict_GetItem( + self._function_cache, None if c_ns_uri is NULL else c_ns_uri) + if c_dict is not NULL: + dict_result = python.PyDict_GetItem( + <object>c_dict, <unsigned char*>c_name) + if dict_result is not NULL: + return <object>dict_result + return None + + # Python access to the XPath context for extension functions + + @property + def context_node(self): + cdef xmlNode* c_node + if self._xpathCtxt is NULL: + raise XPathError, \ + "XPath context is only usable during the evaluation" + c_node = self._xpathCtxt.node + if c_node is NULL: + raise XPathError, "no context node" + if c_node.doc != self._xpathCtxt.doc: + raise XPathError, \ + "document-external context nodes are not supported" + if self._doc is None: + raise XPathError, "document context is missing" + return _elementFactory(self._doc, c_node) + + @property + def eval_context(self): + if self._eval_context_dict is None: + self._eval_context_dict = {} + return self._eval_context_dict + + # Python reference keeping during XPath function evaluation + + @cython.final + cdef _release_temp_refs(self): + "Free temporarily referenced objects from this context." + self._temp_refs.clear() + self._temp_documents.clear() + + @cython.final + cdef _hold(self, obj): + """A way to temporarily hold references to nodes in the evaluator. + + This is needed because otherwise nodes created in XPath extension + functions would be reference counted too soon, during the XPath + evaluation. This is most important in the case of exceptions. + """ + cdef _Element element + if isinstance(obj, _Element): + self._temp_refs.add(obj) + self._temp_documents.add((<_Element>obj)._doc) + return + elif _isString(obj) or not python.PySequence_Check(obj): + return + for o in obj: + if isinstance(o, _Element): + #print "Holding element:", <int>element._c_node + self._temp_refs.add(o) + #print "Holding document:", <int>element._doc._c_doc + self._temp_documents.add((<_Element>o)._doc) + + @cython.final + cdef _Document _findDocumentForNode(self, xmlNode* c_node): + """If an XPath expression returns an element from a different + document than the current context document, we call this to + see if it was possibly created by an extension and is a known + document instance. + """ + cdef _Document doc + for doc in self._temp_documents: + if doc is not None and doc._c_doc is c_node.doc: + return doc + return None + + +# libxml2 keeps these error messages in a static array in its code +# and doesn't give us access to them ... + +cdef tuple LIBXML2_XPATH_ERROR_MESSAGES = ( + b"Ok", + b"Number encoding", + b"Unfinished literal", + b"Start of literal", + b"Expected $ for variable reference", + b"Undefined variable", + b"Invalid predicate", + b"Invalid expression", + b"Missing closing curly brace", + b"Unregistered function", + b"Invalid operand", + b"Invalid type", + b"Invalid number of arguments", + b"Invalid context size", + b"Invalid context position", + b"Memory allocation error", + b"Syntax error", + b"Resource error", + b"Sub resource error", + b"Undefined namespace prefix", + b"Encoding error", + b"Char out of XML range", + b"Invalid or incomplete context", + b"Stack usage error", + b"Forbidden variable\n", + b"?? Unknown error ??\n", +) + +cdef void _forwardXPathError(void* c_ctxt, const xmlerror.xmlError* c_error) noexcept with gil: + cdef xmlerror.xmlError error + cdef int xpath_code + if c_error.message is not NULL: + error.message = c_error.message + else: + xpath_code = c_error.code - xmlerror.XML_XPATH_EXPRESSION_OK + if 0 <= xpath_code < len(LIBXML2_XPATH_ERROR_MESSAGES): + error.message = _cstr(LIBXML2_XPATH_ERROR_MESSAGES[xpath_code]) + else: + error.message = b"unknown error" + error.domain = c_error.domain + error.code = c_error.code + error.level = c_error.level + error.line = c_error.line + error.int2 = c_error.int1 # column + error.file = c_error.file + error.node = NULL + + (<_BaseContext>c_ctxt)._error_log._receive(&error) + +cdef void _receiveXPathError(void* c_context, const xmlerror.xmlError* error) noexcept nogil: + if not __DEBUG: + return + if c_context is NULL: + _forwardError(NULL, error) + else: + _forwardXPathError(c_context, error) + + +def Extension(module, function_mapping=None, *, ns=None): + """Extension(module, function_mapping=None, ns=None) + + Build a dictionary of extension functions from the functions + defined in a module or the methods of an object. + + As second argument, you can pass an additional mapping of + attribute names to XPath function names, or a list of function + names that should be taken. + + The ``ns`` keyword argument accepts a namespace URI for the XPath + functions. + """ + cdef dict functions = {} + if isinstance(function_mapping, dict): + for function_name, xpath_name in function_mapping.items(): + functions[(ns, xpath_name)] = getattr(module, function_name) + else: + if function_mapping is None: + function_mapping = [ name for name in dir(module) + if not name.startswith('_') ] + for function_name in function_mapping: + functions[(ns, function_name)] = getattr(module, function_name) + return functions + +################################################################################ +# EXSLT regexp implementation + +@cython.final +@cython.internal +cdef class _ExsltRegExp: + cdef dict _compile_map + def __cinit__(self): + self._compile_map = {} + + cdef _make_string(self, value): + if _isString(value): + return value + elif isinstance(value, list): + # node set: take recursive text concatenation of first element + if python.PyList_GET_SIZE(value) == 0: + return '' + firstnode = value[0] + if _isString(firstnode): + return firstnode + elif isinstance(firstnode, _Element): + c_text = tree.xmlNodeGetContent((<_Element>firstnode)._c_node) + if c_text is NULL: + raise MemoryError() + try: + return funicode(c_text) + finally: + tree.xmlFree(c_text) + else: + return unicode(firstnode) + else: + return unicode(value) + + cdef _compile(self, rexp, ignore_case): + cdef python.PyObject* c_result + rexp = self._make_string(rexp) + key = (rexp, ignore_case) + c_result = python.PyDict_GetItem(self._compile_map, key) + if c_result is not NULL: + return <object>c_result + py_flags = re.UNICODE + if ignore_case: + py_flags = py_flags | re.IGNORECASE + rexp_compiled = re.compile(rexp, py_flags) + self._compile_map[key] = rexp_compiled + return rexp_compiled + + def test(self, ctxt, s, rexp, flags=''): + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if rexpc.search(s) is None: + return False + else: + return True + + def match(self, ctxt, s, rexp, flags=''): + cdef list result_list + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + if 'g' in flags: + results = rexpc.findall(s) + if not results: + return () + else: + result = rexpc.search(s) + if not result: + return () + results = [ result.group() ] + results.extend( result.groups('') ) + result_list = [] + root = Element('matches') + for s_match in results: + if python.PyTuple_CheckExact(s_match): + s_match = ''.join(s_match) + elem = SubElement(root, 'match') + elem.text = s_match + result_list.append(elem) + return result_list + + def replace(self, ctxt, s, rexp, flags, replacement): + replacement = self._make_string(replacement) + flags = self._make_string(flags) + s = self._make_string(s) + rexpc = self._compile(rexp, 'i' in flags) + count: object = 0 if 'g' in flags else 1 + return rexpc.sub(replacement, s, count) + + cdef _register_in_context(self, _BaseContext context): + ns = b"http://exslt.org/regular-expressions" + context._addLocalExtensionFunction(ns, b"test", self.test) + context._addLocalExtensionFunction(ns, b"match", self.match) + context._addLocalExtensionFunction(ns, b"replace", self.replace) + + +################################################################################ +# helper functions + +cdef xpath.xmlXPathObject* _wrapXPathObject(object obj, _Document doc, + _BaseContext context) except NULL: + cdef xpath.xmlNodeSet* resultSet + cdef _Element fake_node = None + cdef xmlNode* c_node + + if isinstance(obj, unicode): + obj = _utf8(obj) + if isinstance(obj, bytes): + # libxml2 copies the string value + return xpath.xmlXPathNewCString(_cstr(obj)) + if isinstance(obj, bool): + return xpath.xmlXPathNewBoolean(obj) + if python.PyNumber_Check(obj): + return xpath.xmlXPathNewFloat(obj) + if obj is None: + resultSet = xpath.xmlXPathNodeSetCreate(NULL) + elif isinstance(obj, _Element): + resultSet = xpath.xmlXPathNodeSetCreate((<_Element>obj)._c_node) + elif python.PySequence_Check(obj): + resultSet = xpath.xmlXPathNodeSetCreate(NULL) + try: + for value in obj: + if isinstance(value, _Element): + if context is not None: + context._hold(value) + xpath.xmlXPathNodeSetAdd(resultSet, (<_Element>value)._c_node) + else: + if context is None or doc is None: + raise XPathResultError, \ + f"Non-Element values not supported at this point - got {value!r}" + # support strings by appending text nodes to an Element + if isinstance(value, unicode): + value = _utf8(value) + if isinstance(value, bytes): + if fake_node is None: + fake_node = _makeElement("text-root", NULL, doc, None, + None, None, None, None, None) + context._hold(fake_node) + else: + # append a comment node to keep the text nodes separate + c_node = tree.xmlNewDocComment(doc._c_doc, <unsigned char*>"") + if c_node is NULL: + raise MemoryError() + tree.xmlAddChild(fake_node._c_node, c_node) + context._hold(value) + c_node = tree.xmlNewDocText(doc._c_doc, _xcstr(value)) + if c_node is NULL: + raise MemoryError() + tree.xmlAddChild(fake_node._c_node, c_node) + xpath.xmlXPathNodeSetAdd(resultSet, c_node) + else: + raise XPathResultError, \ + f"This is not a supported node-set result: {value!r}" + except: + xpath.xmlXPathFreeNodeSet(resultSet) + raise + else: + raise XPathResultError, f"Unknown return type: {python._fqtypename(obj).decode('utf8')}" + return xpath.xmlXPathWrapNodeSet(resultSet) + +cdef object _unwrapXPathObject(xpath.xmlXPathObject* xpathObj, + _Document doc, _BaseContext context): + if xpathObj.type == xpath.XPATH_UNDEFINED: + raise XPathResultError, "Undefined xpath result" + elif xpathObj.type == xpath.XPATH_NODESET: + return _createNodeSetResult(xpathObj, doc, context) + elif xpathObj.type == xpath.XPATH_BOOLEAN: + return xpathObj.boolval + elif xpathObj.type == xpath.XPATH_NUMBER: + return xpathObj.floatval + elif xpathObj.type == xpath.XPATH_STRING: + stringval = funicode(xpathObj.stringval) + if context._build_smart_strings: + stringval = _elementStringResultFactory( + stringval, None, None, False) + return stringval + elif xpathObj.type == xpath.XPATH_POINT: + raise NotImplementedError, "XPATH_POINT" + elif xpathObj.type == xpath.XPATH_RANGE: + raise NotImplementedError, "XPATH_RANGE" + elif xpathObj.type == xpath.XPATH_LOCATIONSET: + raise NotImplementedError, "XPATH_LOCATIONSET" + elif xpathObj.type == xpath.XPATH_USERS: + raise NotImplementedError, "XPATH_USERS" + elif xpathObj.type == xpath.XPATH_XSLT_TREE: + return _createNodeSetResult(xpathObj, doc, context) + else: + raise XPathResultError, f"Unknown xpath result {xpathObj.type}" + +cdef object _createNodeSetResult(xpath.xmlXPathObject* xpathObj, _Document doc, + _BaseContext context): + cdef xmlNode* c_node + cdef int i + cdef list result + result = [] + if xpathObj.nodesetval is NULL: + return result + for i in range(xpathObj.nodesetval.nodeNr): + c_node = xpathObj.nodesetval.nodeTab[i] + _unpackNodeSetEntry(result, c_node, doc, context, + xpathObj.type == xpath.XPATH_XSLT_TREE) + return result + +cdef _unpackNodeSetEntry(list results, xmlNode* c_node, _Document doc, + _BaseContext context, bint is_fragment): + cdef xmlNode* c_child + if _isElement(c_node): + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # XXX: works, but maybe not always the right thing to do? + # XPath: only runs when extensions create or copy trees + # -> we store Python refs to these, so that is OK + # XSLT: can it leak when merging trees from multiple sources? + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + # FIXME: call _instantiateElementFromXPath() instead? + results.append( + _fakeDocElementFactory(doc, c_node)) + elif c_node.type == tree.XML_TEXT_NODE or \ + c_node.type == tree.XML_CDATA_SECTION_NODE or \ + c_node.type == tree.XML_ATTRIBUTE_NODE: + results.append( + _buildElementStringResult(doc, c_node, context)) + elif c_node.type == tree.XML_NAMESPACE_DECL: + results.append( (funicodeOrNone((<xmlNs*>c_node).prefix), + funicodeOrNone((<xmlNs*>c_node).href)) ) + elif c_node.type == tree.XML_DOCUMENT_NODE or \ + c_node.type == tree.XML_HTML_DOCUMENT_NODE: + # ignored for everything but result tree fragments + if is_fragment: + c_child = c_node.children + while c_child is not NULL: + _unpackNodeSetEntry(results, c_child, doc, context, 0) + c_child = c_child.next + elif c_node.type == tree.XML_XINCLUDE_START or \ + c_node.type == tree.XML_XINCLUDE_END: + pass + else: + raise NotImplementedError, \ + f"Not yet implemented result node type: {c_node.type}" + +cdef void _freeXPathObject(xpath.xmlXPathObject* xpathObj) noexcept: + """Free the XPath object, but *never* free the *content* of node sets. + Python dealloc will do that for us. + """ + if xpathObj.nodesetval is not NULL: + xpath.xmlXPathFreeNodeSet(xpathObj.nodesetval) + xpathObj.nodesetval = NULL + xpath.xmlXPathFreeObject(xpathObj) + +cdef _Element _instantiateElementFromXPath(xmlNode* c_node, _Document doc, + _BaseContext context): + # NOTE: this may copy the element - only call this when it can't leak + if c_node.doc != doc._c_doc and c_node.doc._private is NULL: + # not from the context document and not from a fake document + # either => may still be from a known document, e.g. one + # created by an extension function + node_doc = context._findDocumentForNode(c_node) + if node_doc is None: + # not from a known document at all! => can only make a + # safety copy here + c_node = tree.xmlDocCopyNode(c_node, doc._c_doc, 1) + else: + doc = node_doc + return _fakeDocElementFactory(doc, c_node) + +################################################################################ +# special str/unicode subclasses + +@cython.final +cdef class _ElementUnicodeResult(unicode): + cdef _Element _parent + cdef readonly object attrname + cdef readonly bint is_tail + + def getparent(self): + return self._parent + + @property + def is_text(self): + return self._parent is not None and not (self.is_tail or self.attrname is not None) + + @property + def is_attribute(self): + return self.attrname is not None + +cdef object _elementStringResultFactory(string_value, _Element parent, + attrname, bint is_tail): + result = _ElementUnicodeResult(string_value) + result._parent = parent + result.is_tail = is_tail + result.attrname = attrname + return result + +cdef object _buildElementStringResult(_Document doc, xmlNode* c_node, + _BaseContext context): + cdef _Element parent = None + cdef object attrname = None + cdef xmlNode* c_element + cdef bint is_tail + + if c_node.type == tree.XML_ATTRIBUTE_NODE: + attrname = _namespacedName(c_node) + is_tail = 0 + s = tree.xmlNodeGetContent(c_node) + try: + value = funicode(s) + finally: + tree.xmlFree(s) + c_element = NULL + else: + #assert c_node.type == tree.XML_TEXT_NODE or c_node.type == tree.XML_CDATA_SECTION_NODE, "invalid node type" + # may be tail text or normal text + value = funicode(c_node.content) + c_element = _previousElement(c_node) + is_tail = c_element is not NULL + + if not context._build_smart_strings: + return value + + if c_element is NULL: + # non-tail text or attribute text + c_element = c_node.parent + while c_element is not NULL and not _isElement(c_element): + c_element = c_element.parent + + if c_element is not NULL: + parent = _instantiateElementFromXPath(c_element, doc, context) + + return _elementStringResultFactory( + value, parent, attrname, is_tail) + +################################################################################ +# callbacks for XPath/XSLT extension functions + +cdef void _extension_function_call(_BaseContext context, function, + xpath.xmlXPathParserContext* ctxt, int nargs) noexcept: + cdef _Document doc + cdef xpath.xmlXPathObject* obj + cdef list args + cdef int i + doc = context._doc + try: + args = [] + for i in range(nargs): + obj = xpath.valuePop(ctxt) + o = _unwrapXPathObject(obj, doc, context) + _freeXPathObject(obj) + args.append(o) + args.reverse() + + res = function(context, *args) + # wrap result for XPath consumption + obj = _wrapXPathObject(res, doc, context) + # prevent Python from deallocating elements handed to libxml2 + context._hold(res) + xpath.valuePush(ctxt, obj) + except: + xpath.xmlXPathErr(ctxt, xpath.XPATH_EXPR_ERROR) + context._exc._store_raised() + finally: + return # swallow any further exceptions + +# lookup the function by name and call it + +cdef void _xpath_function_call(xpath.xmlXPathParserContext* ctxt, + int nargs) noexcept with gil: + cdef _BaseContext context + cdef xpath.xmlXPathContext* rctxt = ctxt.context + context = <_BaseContext> rctxt.userData + try: + function = context._find_cached_function(rctxt.functionURI, rctxt.function) + if function is not None: + _extension_function_call(context, function, ctxt, nargs) + else: + xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) + context._exc._store_exception(XPathFunctionError( + f"XPath function '{_namespacedNameFromNsName(rctxt.functionURI, rctxt.function)}' not found")) + except: + # may not be the right error, but we need to tell libxml2 *something* + xpath.xmlXPathErr(ctxt, xpath.XPATH_UNKNOWN_FUNC_ERROR) + context._exc._store_raised() + finally: + return # swallow any further exceptions diff --git a/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py b/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py new file mode 100644 index 00000000..c35365d0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py @@ -0,0 +1,10 @@ +__doc__ = """Legacy interface to the BeautifulSoup HTML parser. +""" + +__all__ = ["parse", "convert_tree"] + +from .soupparser import convert_tree, parse as _parse + +def parse(file, beautifulsoup=None, makeelement=None): + root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement) + return root.getroot() diff --git a/.venv/lib/python3.12/site-packages/lxml/html/__init__.py b/.venv/lib/python3.12/site-packages/lxml/html/__init__.py new file mode 100644 index 00000000..ec55d678 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/__init__.py @@ -0,0 +1,1923 @@ +# Copyright (c) 2004 Ian Bicking. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# 3. Neither the name of Ian Bicking nor the names of its contributors may +# be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL IAN BICKING OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +"""The ``lxml.html`` tool set for HTML handling. +""" + + +__all__ = [ + 'document_fromstring', 'fragment_fromstring', 'fragments_fromstring', 'fromstring', + 'tostring', 'Element', 'defs', 'open_in_browser', 'submit_form', + 'find_rel_links', 'find_class', 'make_links_absolute', + 'resolve_base_href', 'iterlinks', 'rewrite_links', 'parse'] + + +import copy +import re + +from collections.abc import MutableMapping, MutableSet +from functools import partial +from urllib.parse import urljoin + +from .. import etree +from . import defs +from ._setmixin import SetMixin + + +def __fix_docstring(s): + # TODO: remove and clean up doctests + if not s: + return s + sub = re.compile(r"^(\s*)u'", re.M).sub + return sub(r"\1'", s) + + +XHTML_NAMESPACE = "http://www.w3.org/1999/xhtml" + +_rel_links_xpath = etree.XPath("descendant-or-self::a[@rel]|descendant-or-self::x:a[@rel]", + namespaces={'x':XHTML_NAMESPACE}) +_options_xpath = etree.XPath("descendant-or-self::option|descendant-or-self::x:option", + namespaces={'x':XHTML_NAMESPACE}) +_forms_xpath = etree.XPath("descendant-or-self::form|descendant-or-self::x:form", + namespaces={'x':XHTML_NAMESPACE}) +#_class_xpath = etree.XPath(r"descendant-or-self::*[regexp:match(@class, concat('\b', $class_name, '\b'))]", {'regexp': 'http://exslt.org/regular-expressions'}) +_class_xpath = etree.XPath("descendant-or-self::*[@class and contains(concat(' ', normalize-space(@class), ' '), concat(' ', $class_name, ' '))]") +_id_xpath = etree.XPath("descendant-or-self::*[@id=$id]") +_collect_string_content = etree.XPath("string()") +_iter_css_urls = re.compile(r'url\(('+'["][^"]*["]|'+"['][^']*[']|"+r'[^)]*)\)', re.I).finditer +_iter_css_imports = re.compile(r'@import "(.*?)"').finditer +_label_xpath = etree.XPath("//label[@for=$id]|//x:label[@for=$id]", + namespaces={'x':XHTML_NAMESPACE}) +_archive_re = re.compile(r'[^ ]+') +_parse_meta_refresh_url = re.compile( + r'[^;=]*;\s*(?:url\s*=\s*)?(?P<url>.*)$', re.I).search + + +def _unquote_match(s, pos): + if s[:1] == '"' and s[-1:] == '"' or s[:1] == "'" and s[-1:] == "'": + return s[1:-1], pos+1 + else: + return s,pos + + +def _transform_result(typ, result): + """Convert the result back into the input type. + """ + if issubclass(typ, bytes): + return tostring(result, encoding='utf-8') + elif issubclass(typ, str): + return tostring(result, encoding='unicode') + else: + return result + + +def _nons(tag): + if isinstance(tag, str): + if tag[0] == '{' and tag[1:len(XHTML_NAMESPACE)+1] == XHTML_NAMESPACE: + return tag.split('}')[-1] + return tag + + +class Classes(MutableSet): + """Provides access to an element's class attribute as a set-like collection. + Usage:: + + >>> el = fromstring('<p class="hidden large">Text</p>') + >>> classes = el.classes # or: classes = Classes(el.attrib) + >>> classes |= ['block', 'paragraph'] + >>> el.get('class') + 'hidden large block paragraph' + >>> classes.toggle('hidden') + False + >>> el.get('class') + 'large block paragraph' + >>> classes -= ('some', 'classes', 'block') + >>> el.get('class') + 'large paragraph' + """ + def __init__(self, attributes): + self._attributes = attributes + self._get_class_value = partial(attributes.get, 'class', '') + + def add(self, value): + """ + Add a class. + + This has no effect if the class is already present. + """ + if not value or re.search(r'\s', value): + raise ValueError("Invalid class name: %r" % value) + classes = self._get_class_value().split() + if value in classes: + return + classes.append(value) + self._attributes['class'] = ' '.join(classes) + + def discard(self, value): + """ + Remove a class if it is currently present. + + If the class is not present, do nothing. + """ + if not value or re.search(r'\s', value): + raise ValueError("Invalid class name: %r" % value) + classes = [name for name in self._get_class_value().split() + if name != value] + if classes: + self._attributes['class'] = ' '.join(classes) + elif 'class' in self._attributes: + del self._attributes['class'] + + def remove(self, value): + """ + Remove a class; it must currently be present. + + If the class is not present, raise a KeyError. + """ + if not value or re.search(r'\s', value): + raise ValueError("Invalid class name: %r" % value) + super().remove(value) + + def __contains__(self, name): + classes = self._get_class_value() + return name in classes and name in classes.split() + + def __iter__(self): + return iter(self._get_class_value().split()) + + def __len__(self): + return len(self._get_class_value().split()) + + # non-standard methods + + def update(self, values): + """ + Add all names from 'values'. + """ + classes = self._get_class_value().split() + extended = False + for value in values: + if value not in classes: + classes.append(value) + extended = True + if extended: + self._attributes['class'] = ' '.join(classes) + + def toggle(self, value): + """ + Add a class name if it isn't there yet, or remove it if it exists. + + Returns true if the class was added (and is now enabled) and + false if it was removed (and is now disabled). + """ + if not value or re.search(r'\s', value): + raise ValueError("Invalid class name: %r" % value) + classes = self._get_class_value().split() + try: + classes.remove(value) + enabled = False + except ValueError: + classes.append(value) + enabled = True + if classes: + self._attributes['class'] = ' '.join(classes) + else: + del self._attributes['class'] + return enabled + + +class HtmlMixin: + + def set(self, key, value=None): + """set(self, key, value=None) + + Sets an element attribute. If no value is provided, or if the value is None, + creates a 'boolean' attribute without value, e.g. "<form novalidate></form>" + for ``form.set('novalidate')``. + """ + super().set(key, value) + + @property + def classes(self): + """ + A set-like wrapper around the 'class' attribute. + """ + return Classes(self.attrib) + + @classes.setter + def classes(self, classes): + assert isinstance(classes, Classes) # only allow "el.classes |= ..." etc. + value = classes._get_class_value() + if value: + self.set('class', value) + elif self.get('class') is not None: + del self.attrib['class'] + + @property + def base_url(self): + """ + Returns the base URL, given when the page was parsed. + + Use with ``urlparse.urljoin(el.base_url, href)`` to get + absolute URLs. + """ + return self.getroottree().docinfo.URL + + @property + def forms(self): + """ + Return a list of all the forms + """ + return _forms_xpath(self) + + @property + def body(self): + """ + Return the <body> element. Can be called from a child element + to get the document's head. + """ + return self.xpath('//body|//x:body', namespaces={'x':XHTML_NAMESPACE})[0] + + @property + def head(self): + """ + Returns the <head> element. Can be called from a child + element to get the document's head. + """ + return self.xpath('//head|//x:head', namespaces={'x':XHTML_NAMESPACE})[0] + + @property + def label(self): + """ + Get or set any <label> element associated with this element. + """ + id = self.get('id') + if not id: + return None + result = _label_xpath(self, id=id) + if not result: + return None + else: + return result[0] + + @label.setter + def label(self, label): + id = self.get('id') + if not id: + raise TypeError( + "You cannot set a label for an element (%r) that has no id" + % self) + if _nons(label.tag) != 'label': + raise TypeError( + "You can only assign label to a label element (not %r)" + % label) + label.set('for', id) + + @label.deleter + def label(self): + label = self.label + if label is not None: + del label.attrib['for'] + + def drop_tree(self): + """ + Removes this element from the tree, including its children and + text. The tail text is joined to the previous element or + parent. + """ + parent = self.getparent() + assert parent is not None + if self.tail: + previous = self.getprevious() + if previous is None: + parent.text = (parent.text or '') + self.tail + else: + previous.tail = (previous.tail or '') + self.tail + parent.remove(self) + + def drop_tag(self): + """ + Remove the tag, but not its children or text. The children and text + are merged into the parent. + + Example:: + + >>> h = fragment_fromstring('<div>Hello <b>World!</b></div>') + >>> h.find('.//b').drop_tag() + >>> print(tostring(h, encoding='unicode')) + <div>Hello World!</div> + """ + parent = self.getparent() + assert parent is not None + previous = self.getprevious() + if self.text and isinstance(self.tag, str): + # not a Comment, etc. + if previous is None: + parent.text = (parent.text or '') + self.text + else: + previous.tail = (previous.tail or '') + self.text + if self.tail: + if len(self): + last = self[-1] + last.tail = (last.tail or '') + self.tail + elif previous is None: + parent.text = (parent.text or '') + self.tail + else: + previous.tail = (previous.tail or '') + self.tail + index = parent.index(self) + parent[index:index+1] = self[:] + + def find_rel_links(self, rel): + """ + Find any links like ``<a rel="{rel}">...</a>``; returns a list of elements. + """ + rel = rel.lower() + return [el for el in _rel_links_xpath(self) + if el.get('rel').lower() == rel] + + def find_class(self, class_name): + """ + Find any elements with the given class name. + """ + return _class_xpath(self, class_name=class_name) + + def get_element_by_id(self, id, *default): + """ + Get the first element in a document with the given id. If none is + found, return the default argument if provided or raise KeyError + otherwise. + + Note that there can be more than one element with the same id, + and this isn't uncommon in HTML documents found in the wild. + Browsers return only the first match, and this function does + the same. + """ + try: + # FIXME: should this check for multiple matches? + # browsers just return the first one + return _id_xpath(self, id=id)[0] + except IndexError: + if default: + return default[0] + else: + raise KeyError(id) + + def text_content(self): + """ + Return the text content of the tag (and the text in any children). + """ + return _collect_string_content(self) + + def cssselect(self, expr, translator='html'): + """ + Run the CSS expression on this element and its children, + returning a list of the results. + + Equivalent to lxml.cssselect.CSSSelect(expr, translator='html')(self) + -- note that pre-compiling the expression can provide a substantial + speedup. + """ + # Do the import here to make the dependency optional. + from lxml.cssselect import CSSSelector + return CSSSelector(expr, translator=translator)(self) + + ######################################## + ## Link functions + ######################################## + + def make_links_absolute(self, base_url=None, resolve_base_href=True, + handle_failures=None): + """ + Make all links in the document absolute, given the + ``base_url`` for the document (the full URL where the document + came from), or if no ``base_url`` is given, then the ``.base_url`` + of the document. + + If ``resolve_base_href`` is true, then any ``<base href>`` + tags in the document are used *and* removed from the document. + If it is false then any such tag is ignored. + + If ``handle_failures`` is None (default), a failure to process + a URL will abort the processing. If set to 'ignore', errors + are ignored. If set to 'discard', failing URLs will be removed. + """ + if base_url is None: + base_url = self.base_url + if base_url is None: + raise TypeError( + "No base_url given, and the document has no base_url") + if resolve_base_href: + self.resolve_base_href() + + if handle_failures == 'ignore': + def link_repl(href): + try: + return urljoin(base_url, href) + except ValueError: + return href + elif handle_failures == 'discard': + def link_repl(href): + try: + return urljoin(base_url, href) + except ValueError: + return None + elif handle_failures is None: + def link_repl(href): + return urljoin(base_url, href) + else: + raise ValueError( + "unexpected value for handle_failures: %r" % handle_failures) + + self.rewrite_links(link_repl) + + def resolve_base_href(self, handle_failures=None): + """ + Find any ``<base href>`` tag in the document, and apply its + values to all links found in the document. Also remove the + tag once it has been applied. + + If ``handle_failures`` is None (default), a failure to process + a URL will abort the processing. If set to 'ignore', errors + are ignored. If set to 'discard', failing URLs will be removed. + """ + base_href = None + basetags = self.xpath('//base[@href]|//x:base[@href]', + namespaces={'x': XHTML_NAMESPACE}) + for b in basetags: + base_href = b.get('href') + b.drop_tree() + if not base_href: + return + self.make_links_absolute(base_href, resolve_base_href=False, + handle_failures=handle_failures) + + def iterlinks(self): + """ + Yield (element, attribute, link, pos), where attribute may be None + (indicating the link is in the text). ``pos`` is the position + where the link occurs; often 0, but sometimes something else in + the case of links in stylesheets or style tags. + + Note: <base href> is *not* taken into account in any way. The + link you get is exactly the link in the document. + + Note: multiple links inside of a single text string or + attribute value are returned in reversed order. This makes it + possible to replace or delete them from the text string value + based on their reported text positions. Otherwise, a + modification at one text position can change the positions of + links reported later on. + """ + link_attrs = defs.link_attrs + for el in self.iter(etree.Element): + attribs = el.attrib + tag = _nons(el.tag) + if tag == 'object': + codebase = None + ## <object> tags have attributes that are relative to + ## codebase + if 'codebase' in attribs: + codebase = el.get('codebase') + yield (el, 'codebase', codebase, 0) + for attrib in ('classid', 'data'): + if attrib in attribs: + value = el.get(attrib) + if codebase is not None: + value = urljoin(codebase, value) + yield (el, attrib, value, 0) + if 'archive' in attribs: + for match in _archive_re.finditer(el.get('archive')): + value = match.group(0) + if codebase is not None: + value = urljoin(codebase, value) + yield (el, 'archive', value, match.start()) + else: + for attrib in link_attrs: + if attrib in attribs: + yield (el, attrib, attribs[attrib], 0) + if tag == 'meta': + http_equiv = attribs.get('http-equiv', '').lower() + if http_equiv == 'refresh': + content = attribs.get('content', '') + match = _parse_meta_refresh_url(content) + url = (match.group('url') if match else content).strip() + # unexpected content means the redirect won't work, but we might + # as well be permissive and return the entire string. + if url: + url, pos = _unquote_match( + url, match.start('url') if match else content.find(url)) + yield (el, 'content', url, pos) + elif tag == 'param': + valuetype = el.get('valuetype') or '' + if valuetype.lower() == 'ref': + ## FIXME: while it's fine we *find* this link, + ## according to the spec we aren't supposed to + ## actually change the value, including resolving + ## it. It can also still be a link, even if it + ## doesn't have a valuetype="ref" (which seems to be the norm) + ## http://www.w3.org/TR/html401/struct/objects.html#adef-valuetype + yield (el, 'value', el.get('value'), 0) + elif tag == 'style' and el.text: + urls = [ + # (start_pos, url) + _unquote_match(match.group(1), match.start(1))[::-1] + for match in _iter_css_urls(el.text) + ] + [ + (match.start(1), match.group(1)) + for match in _iter_css_imports(el.text) + ] + if urls: + # sort by start pos to bring both match sets back into order + # and reverse the list to report correct positions despite + # modifications + urls.sort(reverse=True) + for start, url in urls: + yield (el, None, url, start) + if 'style' in attribs: + urls = list(_iter_css_urls(attribs['style'])) + if urls: + # return in reversed order to simplify in-place modifications + for match in urls[::-1]: + url, start = _unquote_match(match.group(1), match.start(1)) + yield (el, 'style', url, start) + + def rewrite_links(self, link_repl_func, resolve_base_href=True, + base_href=None): + """ + Rewrite all the links in the document. For each link + ``link_repl_func(link)`` will be called, and the return value + will replace the old link. + + Note that links may not be absolute (unless you first called + ``make_links_absolute()``), and may be internal (e.g., + ``'#anchor'``). They can also be values like + ``'mailto:email'`` or ``'javascript:expr'``. + + If you give ``base_href`` then all links passed to + ``link_repl_func()`` will take that into account. + + If the ``link_repl_func`` returns None, the attribute or + tag text will be removed completely. + """ + if base_href is not None: + # FIXME: this can be done in one pass with a wrapper + # around link_repl_func + self.make_links_absolute( + base_href, resolve_base_href=resolve_base_href) + elif resolve_base_href: + self.resolve_base_href() + + for el, attrib, link, pos in self.iterlinks(): + new_link = link_repl_func(link.strip()) + if new_link == link: + continue + if new_link is None: + # Remove the attribute or element content + if attrib is None: + el.text = '' + else: + del el.attrib[attrib] + continue + + if attrib is None: + new = el.text[:pos] + new_link + el.text[pos+len(link):] + el.text = new + else: + cur = el.get(attrib) + if not pos and len(cur) == len(link): + new = new_link # most common case + else: + new = cur[:pos] + new_link + cur[pos+len(link):] + el.set(attrib, new) + + +class _MethodFunc: + """ + An object that represents a method on an element as a function; + the function takes either an element or an HTML string. It + returns whatever the function normally returns, or if the function + works in-place (and so returns None) it returns a serialized form + of the resulting document. + """ + def __init__(self, name, copy=False, source_class=HtmlMixin): + self.name = name + self.copy = copy + self.__doc__ = getattr(source_class, self.name).__doc__ + def __call__(self, doc, *args, **kw): + result_type = type(doc) + if isinstance(doc, (str, bytes)): + if 'copy' in kw: + raise TypeError( + "The keyword 'copy' can only be used with element inputs to %s, not a string input" % self.name) + doc = fromstring(doc, **kw) + else: + if 'copy' in kw: + make_a_copy = kw.pop('copy') + else: + make_a_copy = self.copy + if make_a_copy: + doc = copy.deepcopy(doc) + meth = getattr(doc, self.name) + result = meth(*args, **kw) + # FIXME: this None test is a bit sloppy + if result is None: + # Then return what we got in + return _transform_result(result_type, doc) + else: + return result + + +find_rel_links = _MethodFunc('find_rel_links', copy=False) +find_class = _MethodFunc('find_class', copy=False) +make_links_absolute = _MethodFunc('make_links_absolute', copy=True) +resolve_base_href = _MethodFunc('resolve_base_href', copy=True) +iterlinks = _MethodFunc('iterlinks', copy=False) +rewrite_links = _MethodFunc('rewrite_links', copy=True) + + +class HtmlComment(HtmlMixin, etree.CommentBase): + pass + + +class HtmlElement(HtmlMixin, etree.ElementBase): + pass + + +class HtmlProcessingInstruction(HtmlMixin, etree.PIBase): + pass + + +class HtmlEntity(HtmlMixin, etree.EntityBase): + pass + + +class HtmlElementClassLookup(etree.CustomElementClassLookup): + """A lookup scheme for HTML Element classes. + + To create a lookup instance with different Element classes, pass a tag + name mapping of Element classes in the ``classes`` keyword argument and/or + a tag name mapping of Mixin classes in the ``mixins`` keyword argument. + The special key '*' denotes a Mixin class that should be mixed into all + Element classes. + """ + _default_element_classes = {} + + def __init__(self, classes=None, mixins=None): + etree.CustomElementClassLookup.__init__(self) + if classes is None: + classes = self._default_element_classes.copy() + if mixins: + mixers = {} + for name, value in mixins: + if name == '*': + for n in classes.keys(): + mixers.setdefault(n, []).append(value) + else: + mixers.setdefault(name, []).append(value) + for name, mix_bases in mixers.items(): + cur = classes.get(name, HtmlElement) + bases = tuple(mix_bases + [cur]) + classes[name] = type(cur.__name__, bases, {}) + self._element_classes = classes + + def lookup(self, node_type, document, namespace, name): + if node_type == 'element': + return self._element_classes.get(name.lower(), HtmlElement) + elif node_type == 'comment': + return HtmlComment + elif node_type == 'PI': + return HtmlProcessingInstruction + elif node_type == 'entity': + return HtmlEntity + # Otherwise normal lookup + return None + + +################################################################################ +# parsing +################################################################################ + +_looks_like_full_html_unicode = re.compile( + r'^\s*<(?:html|!doctype)', re.I).match +_looks_like_full_html_bytes = re.compile( + br'^\s*<(?:html|!doctype)', re.I).match + + +def document_fromstring(html, parser=None, ensure_head_body=False, **kw): + if parser is None: + parser = html_parser + value = etree.fromstring(html, parser, **kw) + if value is None: + raise etree.ParserError( + "Document is empty") + if ensure_head_body and value.find('head') is None: + value.insert(0, Element('head')) + if ensure_head_body and value.find('body') is None: + value.append(Element('body')) + return value + + +def fragments_fromstring(html, no_leading_text=False, base_url=None, + parser=None, **kw): + """Parses several HTML elements, returning a list of elements. + + The first item in the list may be a string. + If no_leading_text is true, then it will be an error if there is + leading text, and it will always be a list of only elements. + + base_url will set the document's base_url attribute + (and the tree's docinfo.URL). + """ + if parser is None: + parser = html_parser + # FIXME: check what happens when you give html with a body, head, etc. + if isinstance(html, bytes): + if not _looks_like_full_html_bytes(html): + # can't use %-formatting in early Py3 versions + html = (b'<html><body>' + html + + b'</body></html>') + else: + if not _looks_like_full_html_unicode(html): + html = '<html><body>%s</body></html>' % html + doc = document_fromstring(html, parser=parser, base_url=base_url, **kw) + assert _nons(doc.tag) == 'html' + bodies = [e for e in doc if _nons(e.tag) == 'body'] + assert len(bodies) == 1, ("too many bodies: %r in %r" % (bodies, html)) + body = bodies[0] + elements = [] + if no_leading_text and body.text and body.text.strip(): + raise etree.ParserError( + "There is leading text: %r" % body.text) + if body.text and body.text.strip(): + elements.append(body.text) + elements.extend(body) + # FIXME: removing the reference to the parent artificial document + # would be nice + return elements + + +def fragment_fromstring(html, create_parent=False, base_url=None, + parser=None, **kw): + """ + Parses a single HTML element; it is an error if there is more than + one element, or if anything but whitespace precedes or follows the + element. + + If ``create_parent`` is true (or is a tag name) then a parent node + will be created to encapsulate the HTML in a single element. In this + case, leading or trailing text is also allowed, as are multiple elements + as result of the parsing. + + Passing a ``base_url`` will set the document's ``base_url`` attribute + (and the tree's docinfo.URL). + """ + if parser is None: + parser = html_parser + + accept_leading_text = bool(create_parent) + + elements = fragments_fromstring( + html, parser=parser, no_leading_text=not accept_leading_text, + base_url=base_url, **kw) + + if create_parent: + if not isinstance(create_parent, str): + create_parent = 'div' + new_root = Element(create_parent) + if elements: + if isinstance(elements[0], str): + new_root.text = elements[0] + del elements[0] + new_root.extend(elements) + return new_root + + if not elements: + raise etree.ParserError('No elements found') + if len(elements) > 1: + raise etree.ParserError( + "Multiple elements found (%s)" + % ', '.join([_element_name(e) for e in elements])) + el = elements[0] + if el.tail and el.tail.strip(): + raise etree.ParserError( + "Element followed by text: %r" % el.tail) + el.tail = None + return el + + +def fromstring(html, base_url=None, parser=None, **kw): + """ + Parse the html, returning a single element/document. + + This tries to minimally parse the chunk of text, without knowing if it + is a fragment or a document. + + base_url will set the document's base_url attribute (and the tree's docinfo.URL) + """ + if parser is None: + parser = html_parser + if isinstance(html, bytes): + is_full_html = _looks_like_full_html_bytes(html) + else: + is_full_html = _looks_like_full_html_unicode(html) + doc = document_fromstring(html, parser=parser, base_url=base_url, **kw) + if is_full_html: + return doc + # otherwise, lets parse it out... + bodies = doc.findall('body') + if not bodies: + bodies = doc.findall('{%s}body' % XHTML_NAMESPACE) + if bodies: + body = bodies[0] + if len(bodies) > 1: + # Somehow there are multiple bodies, which is bad, but just + # smash them into one body + for other_body in bodies[1:]: + if other_body.text: + if len(body): + body[-1].tail = (body[-1].tail or '') + other_body.text + else: + body.text = (body.text or '') + other_body.text + body.extend(other_body) + # We'll ignore tail + # I guess we are ignoring attributes too + other_body.drop_tree() + else: + body = None + heads = doc.findall('head') + if not heads: + heads = doc.findall('{%s}head' % XHTML_NAMESPACE) + if heads: + # Well, we have some sort of structure, so lets keep it all + head = heads[0] + if len(heads) > 1: + for other_head in heads[1:]: + head.extend(other_head) + # We don't care about text or tail in a head + other_head.drop_tree() + return doc + if body is None: + return doc + if (len(body) == 1 and (not body.text or not body.text.strip()) + and (not body[-1].tail or not body[-1].tail.strip())): + # The body has just one element, so it was probably a single + # element passed in + return body[0] + # Now we have a body which represents a bunch of tags which have the + # content that was passed in. We will create a fake container, which + # is the body tag, except <body> implies too much structure. + if _contains_block_level_tag(body): + body.tag = 'div' + else: + body.tag = 'span' + return body + + +def parse(filename_or_url, parser=None, base_url=None, **kw): + """ + Parse a filename, URL, or file-like object into an HTML document + tree. Note: this returns a tree, not an element. Use + ``parse(...).getroot()`` to get the document root. + + You can override the base URL with the ``base_url`` keyword. This + is most useful when parsing from a file-like object. + """ + if parser is None: + parser = html_parser + return etree.parse(filename_or_url, parser, base_url=base_url, **kw) + + +def _contains_block_level_tag(el): + # FIXME: I could do this with XPath, but would that just be + # unnecessarily slow? + for el in el.iter(etree.Element): + if _nons(el.tag) in defs.block_tags: + return True + return False + + +def _element_name(el): + if isinstance(el, etree.CommentBase): + return 'comment' + elif isinstance(el, str): + return 'string' + else: + return _nons(el.tag) + + +################################################################################ +# form handling +################################################################################ + +class FormElement(HtmlElement): + """ + Represents a <form> element. + """ + + @property + def inputs(self): + """ + Returns an accessor for all the input elements in the form. + + See `InputGetter` for more information about the object. + """ + return InputGetter(self) + + @property + def fields(self): + """ + Dictionary-like object that represents all the fields in this + form. You can set values in this dictionary to effect the + form. + """ + return FieldsDict(self.inputs) + + @fields.setter + def fields(self, value): + fields = self.fields + prev_keys = fields.keys() + for key, value in value.items(): + if key in prev_keys: + prev_keys.remove(key) + fields[key] = value + for key in prev_keys: + if key is None: + # Case of an unnamed input; these aren't really + # expressed in form_values() anyway. + continue + fields[key] = None + + def _name(self): + if self.get('name'): + return self.get('name') + elif self.get('id'): + return '#' + self.get('id') + iter_tags = self.body.iter + forms = list(iter_tags('form')) + if not forms: + forms = list(iter_tags('{%s}form' % XHTML_NAMESPACE)) + return str(forms.index(self)) + + def form_values(self): + """ + Return a list of tuples of the field values for the form. + This is suitable to be passed to ``urllib.urlencode()``. + """ + results = [] + for el in self.inputs: + name = el.name + if not name or 'disabled' in el.attrib: + continue + tag = _nons(el.tag) + if tag == 'textarea': + results.append((name, el.value)) + elif tag == 'select': + value = el.value + if el.multiple: + for v in value: + results.append((name, v)) + elif value is not None: + results.append((name, el.value)) + else: + assert tag == 'input', ( + "Unexpected tag: %r" % el) + if el.checkable and not el.checked: + continue + if el.type in ('submit', 'image', 'reset', 'file'): + continue + value = el.value + if value is not None: + results.append((name, el.value)) + return results + + @property + def action(self): + """ + Get/set the form's ``action`` attribute. + """ + base_url = self.base_url + action = self.get('action') + if base_url and action is not None: + return urljoin(base_url, action) + else: + return action + + @action.setter + def action(self, value): + self.set('action', value) + + @action.deleter + def action(self): + attrib = self.attrib + if 'action' in attrib: + del attrib['action'] + + @property + def method(self): + """ + Get/set the form's method. Always returns a capitalized + string, and defaults to ``'GET'`` + """ + return self.get('method', 'GET').upper() + + @method.setter + def method(self, value): + self.set('method', value.upper()) + + +HtmlElementClassLookup._default_element_classes['form'] = FormElement + + +def submit_form(form, extra_values=None, open_http=None): + """ + Helper function to submit a form. Returns a file-like object, as from + ``urllib.urlopen()``. This object also has a ``.geturl()`` function, + which shows the URL if there were any redirects. + + You can use this like:: + + form = doc.forms[0] + form.inputs['foo'].value = 'bar' # etc + response = form.submit() + doc = parse(response) + doc.make_links_absolute(response.geturl()) + + To change the HTTP requester, pass a function as ``open_http`` keyword + argument that opens the URL for you. The function must have the following + signature:: + + open_http(method, URL, values) + + The action is one of 'GET' or 'POST', the URL is the target URL as a + string, and the values are a sequence of ``(name, value)`` tuples with the + form data. + """ + values = form.form_values() + if extra_values: + if hasattr(extra_values, 'items'): + extra_values = extra_values.items() + values.extend(extra_values) + if open_http is None: + open_http = open_http_urllib + if form.action: + url = form.action + else: + url = form.base_url + return open_http(form.method, url, values) + + +def open_http_urllib(method, url, values): + if not url: + raise ValueError("cannot submit, no URL provided") + ## FIXME: should test that it's not a relative URL or something + try: + from urllib import urlencode, urlopen + except ImportError: # Python 3 + from urllib.request import urlopen + from urllib.parse import urlencode + if method == 'GET': + if '?' in url: + url += '&' + else: + url += '?' + url += urlencode(values) + data = None + else: + data = urlencode(values) + if not isinstance(data, bytes): + data = data.encode('ASCII') + return urlopen(url, data) + + +class FieldsDict(MutableMapping): + + def __init__(self, inputs): + self.inputs = inputs + def __getitem__(self, item): + return self.inputs[item].value + def __setitem__(self, item, value): + self.inputs[item].value = value + def __delitem__(self, item): + raise KeyError( + "You cannot remove keys from ElementDict") + def keys(self): + return self.inputs.keys() + def __contains__(self, item): + return item in self.inputs + def __iter__(self): + return iter(self.inputs.keys()) + def __len__(self): + return len(self.inputs) + + def __repr__(self): + return '<%s for form %s>' % ( + self.__class__.__name__, + self.inputs.form._name()) + + +class InputGetter: + + """ + An accessor that represents all the input fields in a form. + + You can get fields by name from this, with + ``form.inputs['field_name']``. If there are a set of checkboxes + with the same name, they are returned as a list (a `CheckboxGroup` + which also allows value setting). Radio inputs are handled + similarly. Use ``.keys()`` and ``.items()`` to process all fields + in this way. + + You can also iterate over this to get all input elements. This + won't return the same thing as if you get all the names, as + checkboxes and radio elements are returned individually. + """ + + def __init__(self, form): + self.form = form + + def __repr__(self): + return '<%s for form %s>' % ( + self.__class__.__name__, + self.form._name()) + + ## FIXME: there should be more methods, and it's unclear if this is + ## a dictionary-like object or list-like object + + def __getitem__(self, name): + fields = [field for field in self if field.name == name] + if not fields: + raise KeyError("No input element with the name %r" % name) + + input_type = fields[0].get('type') + if input_type == 'radio' and len(fields) > 1: + group = RadioGroup(fields) + group.name = name + return group + elif input_type == 'checkbox' and len(fields) > 1: + group = CheckboxGroup(fields) + group.name = name + return group + else: + # I don't like throwing away elements like this + return fields[0] + + def __contains__(self, name): + for field in self: + if field.name == name: + return True + return False + + def keys(self): + """ + Returns all unique field names, in document order. + + :return: A list of all unique field names. + """ + names = [] + seen = {None} + for el in self: + name = el.name + if name not in seen: + names.append(name) + seen.add(name) + return names + + def items(self): + """ + Returns all fields with their names, similar to dict.items(). + + :return: A list of (name, field) tuples. + """ + items = [] + seen = set() + for el in self: + name = el.name + if name not in seen: + seen.add(name) + items.append((name, self[name])) + return items + + def __iter__(self): + return self.form.iter('select', 'input', 'textarea') + + def __len__(self): + return sum(1 for _ in self) + + +class InputMixin: + """ + Mix-in for all input elements (input, select, and textarea) + """ + @property + def name(self): + """ + Get/set the name of the element + """ + return self.get('name') + + @name.setter + def name(self, value): + self.set('name', value) + + @name.deleter + def name(self): + attrib = self.attrib + if 'name' in attrib: + del attrib['name'] + + def __repr__(self): + type_name = getattr(self, 'type', None) + if type_name: + type_name = ' type=%r' % type_name + else: + type_name = '' + return '<%s %x name=%r%s>' % ( + self.__class__.__name__, id(self), self.name, type_name) + + +class TextareaElement(InputMixin, HtmlElement): + """ + ``<textarea>`` element. You can get the name with ``.name`` and + get/set the value with ``.value`` + """ + @property + def value(self): + """ + Get/set the value (which is the contents of this element) + """ + content = self.text or '' + if self.tag.startswith("{%s}" % XHTML_NAMESPACE): + serialisation_method = 'xml' + else: + serialisation_method = 'html' + for el in self: + # it's rare that we actually get here, so let's not use ''.join() + content += etree.tostring( + el, method=serialisation_method, encoding='unicode') + return content + + @value.setter + def value(self, value): + del self[:] + self.text = value + + @value.deleter + def value(self): + self.text = '' + del self[:] + + +HtmlElementClassLookup._default_element_classes['textarea'] = TextareaElement + + +class SelectElement(InputMixin, HtmlElement): + """ + ``<select>`` element. You can get the name with ``.name``. + + ``.value`` will be the value of the selected option, unless this + is a multi-select element (``<select multiple>``), in which case + it will be a set-like object. In either case ``.value_options`` + gives the possible values. + + The boolean attribute ``.multiple`` shows if this is a + multi-select. + """ + @property + def value(self): + """ + Get/set the value of this select (the selected option). + + If this is a multi-select, this is a set-like object that + represents all the selected options. + """ + if self.multiple: + return MultipleSelectOptions(self) + options = _options_xpath(self) + + try: + selected_option = next(el for el in reversed(options) if el.get('selected') is not None) + except StopIteration: + try: + selected_option = next(el for el in options if el.get('disabled') is None) + except StopIteration: + return None + value = selected_option.get('value') + if value is None: + value = (selected_option.text or '').strip() + return value + + @value.setter + def value(self, value): + if self.multiple: + if isinstance(value, str): + raise TypeError("You must pass in a sequence") + values = self.value + values.clear() + values.update(value) + return + checked_option = None + if value is not None: + for el in _options_xpath(self): + opt_value = el.get('value') + if opt_value is None: + opt_value = (el.text or '').strip() + if opt_value == value: + checked_option = el + break + else: + raise ValueError( + "There is no option with the value of %r" % value) + for el in _options_xpath(self): + if 'selected' in el.attrib: + del el.attrib['selected'] + if checked_option is not None: + checked_option.set('selected', '') + + @value.deleter + def value(self): + # FIXME: should del be allowed at all? + if self.multiple: + self.value.clear() + else: + self.value = None + + @property + def value_options(self): + """ + All the possible values this select can have (the ``value`` + attribute of all the ``<option>`` elements. + """ + options = [] + for el in _options_xpath(self): + value = el.get('value') + if value is None: + value = (el.text or '').strip() + options.append(value) + return options + + @property + def multiple(self): + """ + Boolean attribute: is there a ``multiple`` attribute on this element. + """ + return 'multiple' in self.attrib + + @multiple.setter + def multiple(self, value): + if value: + self.set('multiple', '') + elif 'multiple' in self.attrib: + del self.attrib['multiple'] + + +HtmlElementClassLookup._default_element_classes['select'] = SelectElement + + +class MultipleSelectOptions(SetMixin): + """ + Represents all the selected options in a ``<select multiple>`` element. + + You can add to this set-like option to select an option, or remove + to unselect the option. + """ + + def __init__(self, select): + self.select = select + + @property + def options(self): + """ + Iterator of all the ``<option>`` elements. + """ + return iter(_options_xpath(self.select)) + + def __iter__(self): + for option in self.options: + if 'selected' in option.attrib: + opt_value = option.get('value') + if opt_value is None: + opt_value = (option.text or '').strip() + yield opt_value + + def add(self, item): + for option in self.options: + opt_value = option.get('value') + if opt_value is None: + opt_value = (option.text or '').strip() + if opt_value == item: + option.set('selected', '') + break + else: + raise ValueError( + "There is no option with the value %r" % item) + + def remove(self, item): + for option in self.options: + opt_value = option.get('value') + if opt_value is None: + opt_value = (option.text or '').strip() + if opt_value == item: + if 'selected' in option.attrib: + del option.attrib['selected'] + else: + raise ValueError( + "The option %r is not currently selected" % item) + break + else: + raise ValueError( + "There is not option with the value %r" % item) + + def __repr__(self): + return '<%s {%s} for select name=%r>' % ( + self.__class__.__name__, + ', '.join([repr(v) for v in self]), + self.select.name) + + +class RadioGroup(list): + """ + This object represents several ``<input type=radio>`` elements + that have the same name. + + You can use this like a list, but also use the property + ``.value`` to check/uncheck inputs. Also you can use + ``.value_options`` to get the possible values. + """ + @property + def value(self): + """ + Get/set the value, which checks the radio with that value (and + unchecks any other value). + """ + for el in self: + if 'checked' in el.attrib: + return el.get('value') + return None + + @value.setter + def value(self, value): + checked_option = None + if value is not None: + for el in self: + if el.get('value') == value: + checked_option = el + break + else: + raise ValueError("There is no radio input with the value %r" % value) + for el in self: + if 'checked' in el.attrib: + del el.attrib['checked'] + if checked_option is not None: + checked_option.set('checked', '') + + @value.deleter + def value(self): + self.value = None + + @property + def value_options(self): + """ + Returns a list of all the possible values. + """ + return [el.get('value') for el in self] + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, + list.__repr__(self)) + + +class CheckboxGroup(list): + """ + Represents a group of checkboxes (``<input type=checkbox>``) that + have the same name. + + In addition to using this like a list, the ``.value`` attribute + returns a set-like object that you can add to or remove from to + check and uncheck checkboxes. You can also use ``.value_options`` + to get the possible values. + """ + @property + def value(self): + """ + Return a set-like object that can be modified to check or + uncheck individual checkboxes according to their value. + """ + return CheckboxValues(self) + + @value.setter + def value(self, value): + values = self.value + values.clear() + if not hasattr(value, '__iter__'): + raise ValueError( + "A CheckboxGroup (name=%r) must be set to a sequence (not %r)" + % (self[0].name, value)) + values.update(value) + + @value.deleter + def value(self): + self.value.clear() + + @property + def value_options(self): + """ + Returns a list of all the possible values. + """ + return [el.get('value') for el in self] + + def __repr__(self): + return '%s(%s)' % ( + self.__class__.__name__, list.__repr__(self)) + + +class CheckboxValues(SetMixin): + """ + Represents the values of the checked checkboxes in a group of + checkboxes with the same name. + """ + + def __init__(self, group): + self.group = group + + def __iter__(self): + return iter([ + el.get('value') + for el in self.group + if 'checked' in el.attrib]) + + def add(self, value): + for el in self.group: + if el.get('value') == value: + el.set('checked', '') + break + else: + raise KeyError("No checkbox with value %r" % value) + + def remove(self, value): + for el in self.group: + if el.get('value') == value: + if 'checked' in el.attrib: + del el.attrib['checked'] + else: + raise KeyError( + "The checkbox with value %r was already unchecked" % value) + break + else: + raise KeyError( + "No checkbox with value %r" % value) + + def __repr__(self): + return '<%s {%s} for checkboxes name=%r>' % ( + self.__class__.__name__, + ', '.join([repr(v) for v in self]), + self.group.name) + + +class InputElement(InputMixin, HtmlElement): + """ + Represents an ``<input>`` element. + + You can get the type with ``.type`` (which is lower-cased and + defaults to ``'text'``). + + Also you can get and set the value with ``.value`` + + Checkboxes and radios have the attribute ``input.checkable == + True`` (for all others it is false) and a boolean attribute + ``.checked``. + + """ + + ## FIXME: I'm a little uncomfortable with the use of .checked + @property + def value(self): + """ + Get/set the value of this element, using the ``value`` attribute. + + Also, if this is a checkbox and it has no value, this defaults + to ``'on'``. If it is a checkbox or radio that is not + checked, this returns None. + """ + if self.checkable: + if self.checked: + return self.get('value') or 'on' + else: + return None + return self.get('value') + + @value.setter + def value(self, value): + if self.checkable: + if not value: + self.checked = False + else: + self.checked = True + if isinstance(value, str): + self.set('value', value) + else: + self.set('value', value) + + @value.deleter + def value(self): + if self.checkable: + self.checked = False + else: + if 'value' in self.attrib: + del self.attrib['value'] + + @property + def type(self): + """ + Return the type of this element (using the type attribute). + """ + return self.get('type', 'text').lower() + + @type.setter + def type(self, value): + self.set('type', value) + + @property + def checkable(self): + """ + Boolean: can this element be checked? + """ + return self.type in ('checkbox', 'radio') + + @property + def checked(self): + """ + Boolean attribute to get/set the presence of the ``checked`` + attribute. + + You can only use this on checkable input types. + """ + if not self.checkable: + raise AttributeError('Not a checkable input type') + return 'checked' in self.attrib + + @checked.setter + def checked(self, value): + if not self.checkable: + raise AttributeError('Not a checkable input type') + if value: + self.set('checked', '') + else: + attrib = self.attrib + if 'checked' in attrib: + del attrib['checked'] + + +HtmlElementClassLookup._default_element_classes['input'] = InputElement + + +class LabelElement(HtmlElement): + """ + Represents a ``<label>`` element. + + Label elements are linked to other elements with their ``for`` + attribute. You can access this element with ``label.for_element``. + """ + @property + def for_element(self): + """ + Get/set the element this label points to. Return None if it + can't be found. + """ + id = self.get('for') + if not id: + return None + return self.body.get_element_by_id(id) + + @for_element.setter + def for_element(self, other): + id = other.get('id') + if not id: + raise TypeError( + "Element %r has no id attribute" % other) + self.set('for', id) + + @for_element.deleter + def for_element(self): + attrib = self.attrib + if 'id' in attrib: + del attrib['id'] + + +HtmlElementClassLookup._default_element_classes['label'] = LabelElement + + +############################################################ +## Serialization +############################################################ + +def html_to_xhtml(html): + """Convert all tags in an HTML tree to XHTML by moving them to the + XHTML namespace. + """ + try: + html = html.getroot() + except AttributeError: + pass + prefix = "{%s}" % XHTML_NAMESPACE + for el in html.iter(etree.Element): + tag = el.tag + if tag[0] != '{': + el.tag = prefix + tag + + +def xhtml_to_html(xhtml): + """Convert all tags in an XHTML tree to HTML by removing their + XHTML namespace. + """ + try: + xhtml = xhtml.getroot() + except AttributeError: + pass + prefix = "{%s}" % XHTML_NAMESPACE + prefix_len = len(prefix) + for el in xhtml.iter(prefix + "*"): + el.tag = el.tag[prefix_len:] + + +# This isn't a general match, but it's a match for what libxml2 +# specifically serialises: +__str_replace_meta_content_type = re.compile( + r'<meta http-equiv="Content-Type"[^>]*>').sub +__bytes_replace_meta_content_type = re.compile( + br'<meta http-equiv="Content-Type"[^>]*>').sub + + +def tostring(doc, pretty_print=False, include_meta_content_type=False, + encoding=None, method="html", with_tail=True, doctype=None): + """Return an HTML string representation of the document. + + Note: if include_meta_content_type is true this will create a + ``<meta http-equiv="Content-Type" ...>`` tag in the head; + regardless of the value of include_meta_content_type any existing + ``<meta http-equiv="Content-Type" ...>`` tag will be removed + + The ``encoding`` argument controls the output encoding (defaults to + ASCII, with &#...; character references for any characters outside + of ASCII). Note that you can pass the name ``'unicode'`` as + ``encoding`` argument to serialise to a Unicode string. + + The ``method`` argument defines the output method. It defaults to + 'html', but can also be 'xml' for xhtml output, or 'text' to + serialise to plain text without markup. + + To leave out the tail text of the top-level element that is being + serialised, pass ``with_tail=False``. + + The ``doctype`` option allows passing in a plain string that will + be serialised before the XML tree. Note that passing in non + well-formed content here will make the XML output non well-formed. + Also, an existing doctype in the document tree will not be removed + when serialising an ElementTree instance. + + Example:: + + >>> from lxml import html + >>> root = html.fragment_fromstring('<p>Hello<br>world!</p>') + + >>> html.tostring(root) + b'<p>Hello<br>world!</p>' + >>> html.tostring(root, method='html') + b'<p>Hello<br>world!</p>' + + >>> html.tostring(root, method='xml') + b'<p>Hello<br/>world!</p>' + + >>> html.tostring(root, method='text') + b'Helloworld!' + + >>> html.tostring(root, method='text', encoding='unicode') + u'Helloworld!' + + >>> root = html.fragment_fromstring('<div><p>Hello<br>world!</p>TAIL</div>') + >>> html.tostring(root[0], method='text', encoding='unicode') + u'Helloworld!TAIL' + + >>> html.tostring(root[0], method='text', encoding='unicode', with_tail=False) + u'Helloworld!' + + >>> doc = html.document_fromstring('<p>Hello<br>world!</p>') + >>> html.tostring(doc, method='html', encoding='unicode') + u'<html><body><p>Hello<br>world!</p></body></html>' + + >>> print(html.tostring(doc, method='html', encoding='unicode', + ... doctype='<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"' + ... ' "http://www.w3.org/TR/html4/strict.dtd">')) + <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd"> + <html><body><p>Hello<br>world!</p></body></html> + """ + html = etree.tostring(doc, method=method, pretty_print=pretty_print, + encoding=encoding, with_tail=with_tail, + doctype=doctype) + if method == 'html' and not include_meta_content_type: + if isinstance(html, str): + html = __str_replace_meta_content_type('', html) + else: + html = __bytes_replace_meta_content_type(b'', html) + return html + + +tostring.__doc__ = __fix_docstring(tostring.__doc__) + + +def open_in_browser(doc, encoding=None): + """ + Open the HTML document in a web browser, saving it to a temporary + file to open it. Note that this does not delete the file after + use. This is mainly meant for debugging. + """ + import os + import webbrowser + import tempfile + if not isinstance(doc, etree._ElementTree): + doc = etree.ElementTree(doc) + handle, fn = tempfile.mkstemp(suffix='.html') + f = os.fdopen(handle, 'wb') + try: + doc.write(f, method="html", encoding=encoding or doc.docinfo.encoding or "UTF-8") + finally: + # we leak the file itself here, but we should at least close it + f.close() + url = 'file://' + fn.replace(os.path.sep, '/') + print(url) + webbrowser.open(url) + + +################################################################################ +# configure Element class lookup +################################################################################ + +class HTMLParser(etree.HTMLParser): + """An HTML parser that is configured to return lxml.html Element + objects. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_element_class_lookup(HtmlElementClassLookup()) + + +class XHTMLParser(etree.XMLParser): + """An XML parser that is configured to return lxml.html Element + objects. + + Note that this parser is not really XHTML aware unless you let it + load a DTD that declares the HTML entities. To do this, make sure + you have the XHTML DTDs installed in your catalogs, and create the + parser like this:: + + >>> parser = XHTMLParser(load_dtd=True) + + If you additionally want to validate the document, use this:: + + >>> parser = XHTMLParser(dtd_validation=True) + + For catalog support, see http://www.xmlsoft.org/catalog.html. + """ + def __init__(self, **kwargs): + super().__init__(**kwargs) + self.set_element_class_lookup(HtmlElementClassLookup()) + + +def Element(*args, **kw): + """Create a new HTML Element. + + This can also be used for XHTML documents. + """ + v = html_parser.makeelement(*args, **kw) + return v + + +html_parser = HTMLParser() +xhtml_parser = XHTMLParser() diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py b/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py new file mode 100644 index 00000000..b045a2b1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/_diffcommand.py @@ -0,0 +1,86 @@ +import optparse +import sys +import re +import os +from .diff import htmldiff + +description = """\ +""" + +parser = optparse.OptionParser( + usage="%prog [OPTIONS] FILE1 FILE2\n" + "%prog --annotate [OPTIONS] INFO1 FILE1 INFO2 FILE2 ...", + description=description, + ) + +parser.add_option( + '-o', '--output', + metavar="FILE", + dest="output", + default="-", + help="File to write the difference to", + ) + +parser.add_option( + '-a', '--annotation', + action="store_true", + dest="annotation", + help="Do an annotation") + +def main(args=None): + if args is None: + args = sys.argv[1:] + options, args = parser.parse_args(args) + if options.annotation: + return annotate(options, args) + if len(args) != 2: + print('Error: you must give two files') + parser.print_help() + sys.exit(1) + file1, file2 = args + input1 = read_file(file1) + input2 = read_file(file2) + body1 = split_body(input1)[1] + pre, body2, post = split_body(input2) + result = htmldiff(body1, body2) + result = pre + result + post + if options.output == '-': + if not result.endswith('\n'): + result += '\n' + sys.stdout.write(result) + else: + with open(options.output, 'wb') as f: + f.write(result) + +def read_file(filename): + if filename == '-': + c = sys.stdin.read() + elif not os.path.exists(filename): + raise OSError( + "Input file %s does not exist" % filename) + else: + with open(filename, 'rb') as f: + c = f.read() + return c + +body_start_re = re.compile( + r"<body.*?>", re.I|re.S) +body_end_re = re.compile( + r"</body.*?>", re.I|re.S) + +def split_body(html): + pre = post = '' + match = body_start_re.search(html) + if match: + pre = html[:match.end()] + html = html[match.end():] + match = body_end_re.search(html) + if match: + post = html[match.start():] + html = html[:match.start()] + return pre, html, post + +def annotate(options, args): + print("Not yet implemented") + sys.exit(1) + diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py b/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py new file mode 100644 index 00000000..a88ed944 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/_html5builder.py @@ -0,0 +1,100 @@ +""" +Legacy module - don't use in new code! + +html5lib now has its own proper implementation. + +This module implements a tree builder for html5lib that generates lxml +html element trees. This module uses camelCase as it follows the +html5lib style guide. +""" + +from html5lib.treebuilders import _base, etree as etree_builders +from lxml import html, etree + + +class DocumentType: + + def __init__(self, name, publicId, systemId): + self.name = name + self.publicId = publicId + self.systemId = systemId + +class Document: + + def __init__(self): + self._elementTree = None + self.childNodes = [] + + def appendChild(self, element): + self._elementTree.getroot().addnext(element._element) + + +class TreeBuilder(_base.TreeBuilder): + documentClass = Document + doctypeClass = DocumentType + elementClass = None + commentClass = None + fragmentClass = Document + + def __init__(self, *args, **kwargs): + html_builder = etree_builders.getETreeModule(html, fullTree=False) + etree_builder = etree_builders.getETreeModule(etree, fullTree=False) + self.elementClass = html_builder.Element + self.commentClass = etree_builder.Comment + _base.TreeBuilder.__init__(self, *args, **kwargs) + + def reset(self): + _base.TreeBuilder.reset(self) + self.rootInserted = False + self.initialComments = [] + self.doctype = None + + def getDocument(self): + return self.document._elementTree + + def getFragment(self): + fragment = [] + element = self.openElements[0]._element + if element.text: + fragment.append(element.text) + fragment.extend(element.getchildren()) + if element.tail: + fragment.append(element.tail) + return fragment + + def insertDoctype(self, name, publicId, systemId): + doctype = self.doctypeClass(name, publicId, systemId) + self.doctype = doctype + + def insertComment(self, data, parent=None): + if not self.rootInserted: + self.initialComments.append(data) + else: + _base.TreeBuilder.insertComment(self, data, parent) + + def insertRoot(self, name): + buf = [] + if self.doctype and self.doctype.name: + buf.append('<!DOCTYPE %s' % self.doctype.name) + if self.doctype.publicId is not None or self.doctype.systemId is not None: + buf.append(' PUBLIC "%s" "%s"' % (self.doctype.publicId, + self.doctype.systemId)) + buf.append('>') + buf.append('<html></html>') + root = html.fromstring(''.join(buf)) + + # Append the initial comments: + for comment in self.initialComments: + root.addprevious(etree.Comment(comment)) + + # Create the root document and add the ElementTree to it + self.document = self.documentClass() + self.document._elementTree = root.getroottree() + + # Add the root element to the internal child/open data structures + root_element = self.elementClass(name) + root_element._element = root + self.document.childNodes.append(root_element) + self.openElements.append(root_element) + + self.rootInserted = True diff --git a/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py b/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py new file mode 100644 index 00000000..0be2bac4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/_setmixin.py @@ -0,0 +1,56 @@ +try: + from collections.abc import MutableSet +except ImportError: + from collections.abc import MutableSet + + +class SetMixin(MutableSet): + + """ + Mix-in for sets. You must define __iter__, add, remove + """ + + def __len__(self): + length = 0 + for item in self: + length += 1 + return length + + def __contains__(self, item): + for has_item in self: + if item == has_item: + return True + return False + + issubset = MutableSet.__le__ + issuperset = MutableSet.__ge__ + + union = MutableSet.__or__ + intersection = MutableSet.__and__ + difference = MutableSet.__sub__ + symmetric_difference = MutableSet.__xor__ + + def copy(self): + return set(self) + + def update(self, other): + self |= other + + def intersection_update(self, other): + self &= other + + def difference_update(self, other): + self -= other + + def symmetric_difference_update(self, other): + self ^= other + + def discard(self, item): + try: + self.remove(item) + except KeyError: + pass + + @classmethod + def _from_iterable(cls, it): + return set(it) diff --git a/.venv/lib/python3.12/site-packages/lxml/html/builder.py b/.venv/lib/python3.12/site-packages/lxml/html/builder.py new file mode 100644 index 00000000..8a074ecf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/builder.py @@ -0,0 +1,133 @@ +# -------------------------------------------------------------------- +# The ElementTree toolkit is +# Copyright (c) 1999-2004 by Fredrik Lundh +# -------------------------------------------------------------------- + +""" +A set of HTML generator tags for building HTML documents. + +Usage:: + + >>> from lxml.html.builder import * + >>> html = HTML( + ... HEAD( TITLE("Hello World") ), + ... BODY( CLASS("main"), + ... H1("Hello World !") + ... ) + ... ) + + >>> import lxml.etree + >>> print lxml.etree.tostring(html, pretty_print=True) + <html> + <head> + <title>Hello World</title> + </head> + <body class="main"> + <h1>Hello World !</h1> + </body> + </html> + +""" + +from lxml.builder import ElementMaker +from lxml.html import html_parser + +E = ElementMaker(makeelement=html_parser.makeelement) + +# elements +A = E.a #: anchor +ABBR = E.abbr #: abbreviated form (e.g., WWW, HTTP, etc.) +ACRONYM = E.acronym #: +ADDRESS = E.address #: information on author +APPLET = E.applet #: Java applet (DEPRECATED) +AREA = E.area #: client-side image map area +B = E.b #: bold text style +BASE = E.base #: document base URI +BASEFONT = E.basefont #: base font size (DEPRECATED) +BDO = E.bdo #: I18N BiDi over-ride +BIG = E.big #: large text style +BLOCKQUOTE = E.blockquote #: long quotation +BODY = E.body #: document body +BR = E.br #: forced line break +BUTTON = E.button #: push button +CAPTION = E.caption #: table caption +CENTER = E.center #: shorthand for DIV align=center (DEPRECATED) +CITE = E.cite #: citation +CODE = E.code #: computer code fragment +COL = E.col #: table column +COLGROUP = E.colgroup #: table column group +DD = E.dd #: definition description +DEL = getattr(E, 'del') #: deleted text +DFN = E.dfn #: instance definition +DIR = E.dir #: directory list (DEPRECATED) +DIV = E.div #: generic language/style container +DL = E.dl #: definition list +DT = E.dt #: definition term +EM = E.em #: emphasis +FIELDSET = E.fieldset #: form control group +FONT = E.font #: local change to font (DEPRECATED) +FORM = E.form #: interactive form +FRAME = E.frame #: subwindow +FRAMESET = E.frameset #: window subdivision +H1 = E.h1 #: heading +H2 = E.h2 #: heading +H3 = E.h3 #: heading +H4 = E.h4 #: heading +H5 = E.h5 #: heading +H6 = E.h6 #: heading +HEAD = E.head #: document head +HR = E.hr #: horizontal rule +HTML = E.html #: document root element +I = E.i #: italic text style +IFRAME = E.iframe #: inline subwindow +IMG = E.img #: Embedded image +INPUT = E.input #: form control +INS = E.ins #: inserted text +ISINDEX = E.isindex #: single line prompt (DEPRECATED) +KBD = E.kbd #: text to be entered by the user +LABEL = E.label #: form field label text +LEGEND = E.legend #: fieldset legend +LI = E.li #: list item +LINK = E.link #: a media-independent link +MAP = E.map #: client-side image map +MENU = E.menu #: menu list (DEPRECATED) +META = E.meta #: generic metainformation +NOFRAMES = E.noframes #: alternate content container for non frame-based rendering +NOSCRIPT = E.noscript #: alternate content container for non script-based rendering +OBJECT = E.object #: generic embedded object +OL = E.ol #: ordered list +OPTGROUP = E.optgroup #: option group +OPTION = E.option #: selectable choice +P = E.p #: paragraph +PARAM = E.param #: named property value +PRE = E.pre #: preformatted text +Q = E.q #: short inline quotation +S = E.s #: strike-through text style (DEPRECATED) +SAMP = E.samp #: sample program output, scripts, etc. +SCRIPT = E.script #: script statements +SELECT = E.select #: option selector +SMALL = E.small #: small text style +SPAN = E.span #: generic language/style container +STRIKE = E.strike #: strike-through text (DEPRECATED) +STRONG = E.strong #: strong emphasis +STYLE = E.style #: style info +SUB = E.sub #: subscript +SUP = E.sup #: superscript +TABLE = E.table #: +TBODY = E.tbody #: table body +TD = E.td #: table data cell +TEXTAREA = E.textarea #: multi-line text field +TFOOT = E.tfoot #: table footer +TH = E.th #: table header cell +THEAD = E.thead #: table header +TITLE = E.title #: document title +TR = E.tr #: table row +TT = E.tt #: teletype or monospaced text style +U = E.u #: underlined text style (DEPRECATED) +UL = E.ul #: unordered list +VAR = E.var #: instance of a variable or program argument + +# attributes (only reserved words are included here) +ATTR = dict +def CLASS(v): return {'class': v} +def FOR(v): return {'for': v} diff --git a/.venv/lib/python3.12/site-packages/lxml/html/clean.py b/.venv/lib/python3.12/site-packages/lxml/html/clean.py new file mode 100644 index 00000000..d4b9e96d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/clean.py @@ -0,0 +1,21 @@ +# cython: language_level=3str + +"""Backward-compatibility module for lxml_html_clean""" + +try: + from lxml_html_clean import * + + __all__ = [ + "clean_html", + "clean", + "Cleaner", + "autolink", + "autolink_html", + "word_break", + "word_break_html", + ] +except ImportError: + raise ImportError( + "lxml.html.clean module is now a separate project lxml_html_clean.\n" + "Install lxml[html_clean] or lxml_html_clean directly." + ) from None diff --git a/.venv/lib/python3.12/site-packages/lxml/html/defs.py b/.venv/lib/python3.12/site-packages/lxml/html/defs.py new file mode 100644 index 00000000..2058ea33 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/defs.py @@ -0,0 +1,135 @@ +# FIXME: this should all be confirmed against what a DTD says +# (probably in a test; this may not match the DTD exactly, but we +# should document just how it differs). + +""" +Data taken from https://www.w3.org/TR/html401/index/elements.html +and https://www.w3.org/community/webed/wiki/HTML/New_HTML5_Elements +for html5_tags. +""" + +empty_tags = frozenset([ + 'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr', + 'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track']) + +deprecated_tags = frozenset([ + 'applet', 'basefont', 'center', 'dir', 'font', 'isindex', + 'menu', 's', 'strike', 'u']) + +# archive actually takes a space-separated list of URIs +link_attrs = frozenset([ + 'action', 'archive', 'background', 'cite', 'classid', + 'codebase', 'data', 'href', 'longdesc', 'profile', 'src', + 'usemap', + # Not standard: + 'dynsrc', 'lowsrc', + # HTML5 formaction + 'formaction' + ]) + +# Not in the HTML 4 spec: +# onerror, onresize +event_attrs = frozenset([ + 'onblur', 'onchange', 'onclick', 'ondblclick', 'onerror', + 'onfocus', 'onkeydown', 'onkeypress', 'onkeyup', 'onload', + 'onmousedown', 'onmousemove', 'onmouseout', 'onmouseover', + 'onmouseup', 'onreset', 'onresize', 'onselect', 'onsubmit', + 'onunload', + ]) + +safe_attrs = frozenset([ + 'abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', + 'alt', 'axis', 'border', 'cellpadding', 'cellspacing', 'char', 'charoff', + 'charset', 'checked', 'cite', 'class', 'clear', 'cols', 'colspan', + 'color', 'compact', 'coords', 'datetime', 'dir', 'disabled', 'enctype', + 'for', 'frame', 'headers', 'height', 'href', 'hreflang', 'hspace', 'id', + 'ismap', 'label', 'lang', 'longdesc', 'maxlength', 'media', 'method', + 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'prompt', 'readonly', + 'rel', 'rev', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', + 'size', 'span', 'src', 'start', 'summary', 'tabindex', 'target', 'title', + 'type', 'usemap', 'valign', 'value', 'vspace', 'width']) + +# From http://htmlhelp.com/reference/html40/olist.html +top_level_tags = frozenset([ + 'html', 'head', 'body', 'frameset', + ]) + +head_tags = frozenset([ + 'base', 'isindex', 'link', 'meta', 'script', 'style', 'title', + ]) + +general_block_tags = frozenset([ + 'address', + 'blockquote', + 'center', + 'del', + 'div', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'ins', + 'isindex', + 'noscript', + 'p', + 'pre', + ]) + +list_tags = frozenset([ + 'dir', 'dl', 'dt', 'dd', 'li', 'menu', 'ol', 'ul', + ]) + +table_tags = frozenset([ + 'table', 'caption', 'colgroup', 'col', + 'thead', 'tfoot', 'tbody', 'tr', 'td', 'th', + ]) + +# just this one from +# http://www.georgehernandez.com/h/XComputers/HTML/2BlockLevel.htm +block_tags = general_block_tags | list_tags | table_tags | frozenset([ + # Partial form tags + 'fieldset', 'form', 'legend', 'optgroup', 'option', + ]) + +form_tags = frozenset([ + 'form', 'button', 'fieldset', 'legend', 'input', 'label', + 'select', 'optgroup', 'option', 'textarea', + ]) + +special_inline_tags = frozenset([ + 'a', 'applet', 'basefont', 'bdo', 'br', 'embed', 'font', 'iframe', + 'img', 'map', 'area', 'object', 'param', 'q', 'script', + 'span', 'sub', 'sup', + ]) + +phrase_tags = frozenset([ + 'abbr', 'acronym', 'cite', 'code', 'del', 'dfn', 'em', + 'ins', 'kbd', 'samp', 'strong', 'var', + ]) + +font_style_tags = frozenset([ + 'b', 'big', 'i', 's', 'small', 'strike', 'tt', 'u', + ]) + +frame_tags = frozenset([ + 'frameset', 'frame', 'noframes', + ]) + +html5_tags = frozenset([ + 'article', 'aside', 'audio', 'canvas', 'command', 'datalist', + 'details', 'embed', 'figcaption', 'figure', 'footer', 'header', + 'hgroup', 'keygen', 'mark', 'math', 'meter', 'nav', 'output', + 'progress', 'rp', 'rt', 'ruby', 'section', 'source', 'summary', + 'svg', 'time', 'track', 'video', 'wbr' + ]) + +# These tags aren't standard +nonstandard_tags = frozenset(['blink', 'marquee']) + + +tags = (top_level_tags | head_tags | general_block_tags | list_tags + | table_tags | form_tags | special_inline_tags | phrase_tags + | font_style_tags | nonstandard_tags | html5_tags) diff --git a/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..d43b3fef --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/diff.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/html/diff.py b/.venv/lib/python3.12/site-packages/lxml/html/diff.py new file mode 100644 index 00000000..56d28057 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/diff.py @@ -0,0 +1,878 @@ +# cython: language_level=3 + + +import difflib +from lxml import etree +from lxml.html import fragment_fromstring +import re + +__all__ = ['html_annotate', 'htmldiff'] + +try: + from html import escape as html_escape +except ImportError: + from cgi import escape as html_escape +try: + _unicode = unicode +except NameError: + # Python 3 + _unicode = str +try: + basestring +except NameError: + # Python 3 + basestring = str + +############################################################ +## Annotation +############################################################ + +def default_markup(text, version): + return '<span title="%s">%s</span>' % ( + html_escape(_unicode(version), 1), text) + +def html_annotate(doclist, markup=default_markup): + """ + doclist should be ordered from oldest to newest, like:: + + >>> version1 = 'Hello World' + >>> version2 = 'Goodbye World' + >>> print(html_annotate([(version1, 'version 1'), + ... (version2, 'version 2')])) + <span title="version 2">Goodbye</span> <span title="version 1">World</span> + + The documents must be *fragments* (str/UTF8 or unicode), not + complete documents + + The markup argument is a function to markup the spans of words. + This function is called like markup('Hello', 'version 2'), and + returns HTML. The first argument is text and never includes any + markup. The default uses a span with a title: + + >>> print(default_markup('Some Text', 'by Joe')) + <span title="by Joe">Some Text</span> + """ + # The basic strategy we have is to split the documents up into + # logical tokens (which are words with attached markup). We then + # do diffs of each of the versions to track when a token first + # appeared in the document; the annotation attached to the token + # is the version where it first appeared. + tokenlist = [tokenize_annotated(doc, version) + for doc, version in doclist] + cur_tokens = tokenlist[0] + for tokens in tokenlist[1:]: + html_annotate_merge_annotations(cur_tokens, tokens) + cur_tokens = tokens + + # After we've tracked all the tokens, we can combine spans of text + # that are adjacent and have the same annotation + cur_tokens = compress_tokens(cur_tokens) + # And finally add markup + result = markup_serialize_tokens(cur_tokens, markup) + return ''.join(result).strip() + +def tokenize_annotated(doc, annotation): + """Tokenize a document and add an annotation attribute to each token + """ + tokens = tokenize(doc, include_hrefs=False) + for tok in tokens: + tok.annotation = annotation + return tokens + +def html_annotate_merge_annotations(tokens_old, tokens_new): + """Merge the annotations from tokens_old into tokens_new, when the + tokens in the new document already existed in the old document. + """ + s = InsensitiveSequenceMatcher(a=tokens_old, b=tokens_new) + commands = s.get_opcodes() + + for command, i1, i2, j1, j2 in commands: + if command == 'equal': + eq_old = tokens_old[i1:i2] + eq_new = tokens_new[j1:j2] + copy_annotations(eq_old, eq_new) + +def copy_annotations(src, dest): + """ + Copy annotations from the tokens listed in src to the tokens in dest + """ + assert len(src) == len(dest) + for src_tok, dest_tok in zip(src, dest): + dest_tok.annotation = src_tok.annotation + +def compress_tokens(tokens): + """ + Combine adjacent tokens when there is no HTML between the tokens, + and they share an annotation + """ + result = [tokens[0]] + for tok in tokens[1:]: + if (not result[-1].post_tags and + not tok.pre_tags and + result[-1].annotation == tok.annotation): + compress_merge_back(result, tok) + else: + result.append(tok) + return result + +def compress_merge_back(tokens, tok): + """ Merge tok into the last element of tokens (modifying the list of + tokens in-place). """ + last = tokens[-1] + if type(last) is not token or type(tok) is not token: + tokens.append(tok) + else: + text = _unicode(last) + if last.trailing_whitespace: + text += last.trailing_whitespace + text += tok + merged = token(text, + pre_tags=last.pre_tags, + post_tags=tok.post_tags, + trailing_whitespace=tok.trailing_whitespace) + merged.annotation = last.annotation + tokens[-1] = merged + +def markup_serialize_tokens(tokens, markup_func): + """ + Serialize the list of tokens into a list of text chunks, calling + markup_func around text to add annotations. + """ + for token in tokens: + yield from token.pre_tags + html = token.html() + html = markup_func(html, token.annotation) + if token.trailing_whitespace: + html += token.trailing_whitespace + yield html + yield from token.post_tags + + +############################################################ +## HTML Diffs +############################################################ + +def htmldiff(old_html, new_html): + ## FIXME: this should take parsed documents too, and use their body + ## or other content. + """ Do a diff of the old and new document. The documents are HTML + *fragments* (str/UTF8 or unicode), they are not complete documents + (i.e., no <html> tag). + + Returns HTML with <ins> and <del> tags added around the + appropriate text. + + Markup is generally ignored, with the markup from new_html + preserved, and possibly some markup from old_html (though it is + considered acceptable to lose some of the old markup). Only the + words in the HTML are diffed. The exception is <img> tags, which + are treated like words, and the href attribute of <a> tags, which + are noted inside the tag itself when there are changes. + """ + old_html_tokens = tokenize(old_html) + new_html_tokens = tokenize(new_html) + result = htmldiff_tokens(old_html_tokens, new_html_tokens) + result = ''.join(result).strip() + return fixup_ins_del_tags(result) + +def htmldiff_tokens(html1_tokens, html2_tokens): + """ Does a diff on the tokens themselves, returning a list of text + chunks (not tokens). + """ + # There are several passes as we do the differences. The tokens + # isolate the portion of the content we care to diff; difflib does + # all the actual hard work at that point. + # + # Then we must create a valid document from pieces of both the old + # document and the new document. We generally prefer to take + # markup from the new document, and only do a best effort attempt + # to keep markup from the old document; anything that we can't + # resolve we throw away. Also we try to put the deletes as close + # to the location where we think they would have been -- because + # we are only keeping the markup from the new document, it can be + # fuzzy where in the new document the old text would have gone. + # Again we just do a best effort attempt. + s = InsensitiveSequenceMatcher(a=html1_tokens, b=html2_tokens) + commands = s.get_opcodes() + result = [] + for command, i1, i2, j1, j2 in commands: + if command == 'equal': + result.extend(expand_tokens(html2_tokens[j1:j2], equal=True)) + continue + if command == 'insert' or command == 'replace': + ins_tokens = expand_tokens(html2_tokens[j1:j2]) + merge_insert(ins_tokens, result) + if command == 'delete' or command == 'replace': + del_tokens = expand_tokens(html1_tokens[i1:i2]) + merge_delete(del_tokens, result) + # If deletes were inserted directly as <del> then we'd have an + # invalid document at this point. Instead we put in special + # markers, and when the complete diffed document has been created + # we try to move the deletes around and resolve any problems. + result = cleanup_delete(result) + + return result + +def expand_tokens(tokens, equal=False): + """Given a list of tokens, return a generator of the chunks of + text for the data in the tokens. + """ + for token in tokens: + yield from token.pre_tags + if not equal or not token.hide_when_equal: + if token.trailing_whitespace: + yield token.html() + token.trailing_whitespace + else: + yield token.html() + yield from token.post_tags + +def merge_insert(ins_chunks, doc): + """ doc is the already-handled document (as a list of text chunks); + here we add <ins>ins_chunks</ins> to the end of that. """ + # Though we don't throw away unbalanced_start or unbalanced_end + # (we assume there is accompanying markup later or earlier in the + # document), we only put <ins> around the balanced portion. + unbalanced_start, balanced, unbalanced_end = split_unbalanced(ins_chunks) + doc.extend(unbalanced_start) + if doc and not doc[-1].endswith(' '): + # Fix up the case where the word before the insert didn't end with + # a space + doc[-1] += ' ' + doc.append('<ins>') + if balanced and balanced[-1].endswith(' '): + # We move space outside of </ins> + balanced[-1] = balanced[-1][:-1] + doc.extend(balanced) + doc.append('</ins> ') + doc.extend(unbalanced_end) + +# These are sentinels to represent the start and end of a <del> +# segment, until we do the cleanup phase to turn them into proper +# markup: +class DEL_START: + pass +class DEL_END: + pass + +class NoDeletes(Exception): + """ Raised when the document no longer contains any pending deletes + (DEL_START/DEL_END) """ + +def merge_delete(del_chunks, doc): + """ Adds the text chunks in del_chunks to the document doc (another + list of text chunks) with marker to show it is a delete. + cleanup_delete later resolves these markers into <del> tags.""" + doc.append(DEL_START) + doc.extend(del_chunks) + doc.append(DEL_END) + +def cleanup_delete(chunks): + """ Cleans up any DEL_START/DEL_END markers in the document, replacing + them with <del></del>. To do this while keeping the document + valid, it may need to drop some tags (either start or end tags). + + It may also move the del into adjacent tags to try to move it to a + similar location where it was originally located (e.g., moving a + delete into preceding <div> tag, if the del looks like (DEL_START, + 'Text</div>', DEL_END)""" + while 1: + # Find a pending DEL_START/DEL_END, splitting the document + # into stuff-preceding-DEL_START, stuff-inside, and + # stuff-following-DEL_END + try: + pre_delete, delete, post_delete = split_delete(chunks) + except NoDeletes: + # Nothing found, we've cleaned up the entire doc + break + # The stuff-inside-DEL_START/END may not be well balanced + # markup. First we figure out what unbalanced portions there are: + unbalanced_start, balanced, unbalanced_end = split_unbalanced(delete) + # Then we move the span forward and/or backward based on these + # unbalanced portions: + locate_unbalanced_start(unbalanced_start, pre_delete, post_delete) + locate_unbalanced_end(unbalanced_end, pre_delete, post_delete) + doc = pre_delete + if doc and not doc[-1].endswith(' '): + # Fix up case where the word before us didn't have a trailing space + doc[-1] += ' ' + doc.append('<del>') + if balanced and balanced[-1].endswith(' '): + # We move space outside of </del> + balanced[-1] = balanced[-1][:-1] + doc.extend(balanced) + doc.append('</del> ') + doc.extend(post_delete) + chunks = doc + return chunks + +def split_unbalanced(chunks): + """Return (unbalanced_start, balanced, unbalanced_end), where each is + a list of text and tag chunks. + + unbalanced_start is a list of all the tags that are opened, but + not closed in this span. Similarly, unbalanced_end is a list of + tags that are closed but were not opened. Extracting these might + mean some reordering of the chunks.""" + start = [] + end = [] + tag_stack = [] + balanced = [] + for chunk in chunks: + if not chunk.startswith('<'): + balanced.append(chunk) + continue + endtag = chunk[1] == '/' + name = chunk.split()[0].strip('<>/') + if name in empty_tags: + balanced.append(chunk) + continue + if endtag: + if tag_stack and tag_stack[-1][0] == name: + balanced.append(chunk) + name, pos, tag = tag_stack.pop() + balanced[pos] = tag + elif tag_stack: + start.extend([tag for name, pos, tag in tag_stack]) + tag_stack = [] + end.append(chunk) + else: + end.append(chunk) + else: + tag_stack.append((name, len(balanced), chunk)) + balanced.append(None) + start.extend( + [chunk for name, pos, chunk in tag_stack]) + balanced = [chunk for chunk in balanced if chunk is not None] + return start, balanced, end + +def split_delete(chunks): + """ Returns (stuff_before_DEL_START, stuff_inside_DEL_START_END, + stuff_after_DEL_END). Returns the first case found (there may be + more DEL_STARTs in stuff_after_DEL_END). Raises NoDeletes if + there's no DEL_START found. """ + try: + pos = chunks.index(DEL_START) + except ValueError: + raise NoDeletes + pos2 = chunks.index(DEL_END) + return chunks[:pos], chunks[pos+1:pos2], chunks[pos2+1:] + +def locate_unbalanced_start(unbalanced_start, pre_delete, post_delete): + """ pre_delete and post_delete implicitly point to a place in the + document (where the two were split). This moves that point (by + popping items from one and pushing them onto the other). It moves + the point to try to find a place where unbalanced_start applies. + + As an example:: + + >>> unbalanced_start = ['<div>'] + >>> doc = ['<p>', 'Text', '</p>', '<div>', 'More Text', '</div>'] + >>> pre, post = doc[:3], doc[3:] + >>> pre, post + (['<p>', 'Text', '</p>'], ['<div>', 'More Text', '</div>']) + >>> locate_unbalanced_start(unbalanced_start, pre, post) + >>> pre, post + (['<p>', 'Text', '</p>', '<div>'], ['More Text', '</div>']) + + As you can see, we moved the point so that the dangling <div> that + we found will be effectively replaced by the div in the original + document. If this doesn't work out, we just throw away + unbalanced_start without doing anything. + """ + while 1: + if not unbalanced_start: + # We have totally succeeded in finding the position + break + finding = unbalanced_start[0] + finding_name = finding.split()[0].strip('<>') + if not post_delete: + break + next = post_delete[0] + if next is DEL_START or not next.startswith('<'): + # Reached a word, we can't move the delete text forward + break + if next[1] == '/': + # Reached a closing tag, can we go further? Maybe not... + break + name = next.split()[0].strip('<>') + if name == 'ins': + # Can't move into an insert + break + assert name != 'del', ( + "Unexpected delete tag: %r" % next) + if name == finding_name: + unbalanced_start.pop(0) + pre_delete.append(post_delete.pop(0)) + else: + # Found a tag that doesn't match + break + +def locate_unbalanced_end(unbalanced_end, pre_delete, post_delete): + """ like locate_unbalanced_start, except handling end tags and + possibly moving the point earlier in the document. """ + while 1: + if not unbalanced_end: + # Success + break + finding = unbalanced_end[-1] + finding_name = finding.split()[0].strip('<>/') + if not pre_delete: + break + next = pre_delete[-1] + if next is DEL_END or not next.startswith('</'): + # A word or a start tag + break + name = next.split()[0].strip('<>/') + if name == 'ins' or name == 'del': + # Can't move into an insert or delete + break + if name == finding_name: + unbalanced_end.pop() + post_delete.insert(0, pre_delete.pop()) + else: + # Found a tag that doesn't match + break + +class token(_unicode): + """ Represents a diffable token, generally a word that is displayed to + the user. Opening tags are attached to this token when they are + adjacent (pre_tags) and closing tags that follow the word + (post_tags). Some exceptions occur when there are empty tags + adjacent to a word, so there may be close tags in pre_tags, or + open tags in post_tags. + + We also keep track of whether the word was originally followed by + whitespace, even though we do not want to treat the word as + equivalent to a similar word that does not have a trailing + space.""" + + # When this is true, the token will be eliminated from the + # displayed diff if no change has occurred: + hide_when_equal = False + + def __new__(cls, text, pre_tags=None, post_tags=None, trailing_whitespace=""): + obj = _unicode.__new__(cls, text) + + if pre_tags is not None: + obj.pre_tags = pre_tags + else: + obj.pre_tags = [] + + if post_tags is not None: + obj.post_tags = post_tags + else: + obj.post_tags = [] + + obj.trailing_whitespace = trailing_whitespace + + return obj + + def __repr__(self): + return 'token(%s, %r, %r, %r)' % (_unicode.__repr__(self), self.pre_tags, + self.post_tags, self.trailing_whitespace) + + def html(self): + return _unicode(self) + +class tag_token(token): + + """ Represents a token that is actually a tag. Currently this is just + the <img> tag, which takes up visible space just like a word but + is only represented in a document by a tag. """ + + def __new__(cls, tag, data, html_repr, pre_tags=None, + post_tags=None, trailing_whitespace=""): + obj = token.__new__(cls, "%s: %s" % (type, data), + pre_tags=pre_tags, + post_tags=post_tags, + trailing_whitespace=trailing_whitespace) + obj.tag = tag + obj.data = data + obj.html_repr = html_repr + return obj + + def __repr__(self): + return 'tag_token(%s, %s, html_repr=%s, post_tags=%r, pre_tags=%r, trailing_whitespace=%r)' % ( + self.tag, + self.data, + self.html_repr, + self.pre_tags, + self.post_tags, + self.trailing_whitespace) + def html(self): + return self.html_repr + +class href_token(token): + + """ Represents the href in an anchor tag. Unlike other words, we only + show the href when it changes. """ + + hide_when_equal = True + + def html(self): + return ' Link: %s' % self + +def tokenize(html, include_hrefs=True): + """ + Parse the given HTML and returns token objects (words with attached tags). + + This parses only the content of a page; anything in the head is + ignored, and the <head> and <body> elements are themselves + optional. The content is then parsed by lxml, which ensures the + validity of the resulting parsed document (though lxml may make + incorrect guesses when the markup is particular bad). + + <ins> and <del> tags are also eliminated from the document, as + that gets confusing. + + If include_hrefs is true, then the href attribute of <a> tags is + included as a special kind of diffable token.""" + if etree.iselement(html): + body_el = html + else: + body_el = parse_html(html, cleanup=True) + # Then we split the document into text chunks for each tag, word, and end tag: + chunks = flatten_el(body_el, skip_tag=True, include_hrefs=include_hrefs) + # Finally re-joining them into token objects: + return fixup_chunks(chunks) + +def parse_html(html, cleanup=True): + """ + Parses an HTML fragment, returning an lxml element. Note that the HTML will be + wrapped in a <div> tag that was not in the original document. + + If cleanup is true, make sure there's no <head> or <body>, and get + rid of any <ins> and <del> tags. + """ + if cleanup: + # This removes any extra markup or structure like <head>: + html = cleanup_html(html) + return fragment_fromstring(html, create_parent=True) + +_body_re = re.compile(r'<body.*?>', re.I|re.S) +_end_body_re = re.compile(r'</body.*?>', re.I|re.S) +_ins_del_re = re.compile(r'</?(ins|del).*?>', re.I|re.S) + +def cleanup_html(html): + """ This 'cleans' the HTML, meaning that any page structure is removed + (only the contents of <body> are used, if there is any <body). + Also <ins> and <del> tags are removed. """ + match = _body_re.search(html) + if match: + html = html[match.end():] + match = _end_body_re.search(html) + if match: + html = html[:match.start()] + html = _ins_del_re.sub('', html) + return html + + +end_whitespace_re = re.compile(r'[ \t\n\r]$') + +def split_trailing_whitespace(word): + """ + This function takes a word, such as 'test\n\n' and returns ('test','\n\n') + """ + stripped_length = len(word.rstrip()) + return word[0:stripped_length], word[stripped_length:] + + +def fixup_chunks(chunks): + """ + This function takes a list of chunks and produces a list of tokens. + """ + tag_accum = [] + cur_word = None + result = [] + for chunk in chunks: + if isinstance(chunk, tuple): + if chunk[0] == 'img': + src = chunk[1] + tag, trailing_whitespace = split_trailing_whitespace(chunk[2]) + cur_word = tag_token('img', src, html_repr=tag, + pre_tags=tag_accum, + trailing_whitespace=trailing_whitespace) + tag_accum = [] + result.append(cur_word) + + elif chunk[0] == 'href': + href = chunk[1] + cur_word = href_token(href, pre_tags=tag_accum, trailing_whitespace=" ") + tag_accum = [] + result.append(cur_word) + continue + + if is_word(chunk): + chunk, trailing_whitespace = split_trailing_whitespace(chunk) + cur_word = token(chunk, pre_tags=tag_accum, trailing_whitespace=trailing_whitespace) + tag_accum = [] + result.append(cur_word) + + elif is_start_tag(chunk): + tag_accum.append(chunk) + + elif is_end_tag(chunk): + if tag_accum: + tag_accum.append(chunk) + else: + assert cur_word, ( + "Weird state, cur_word=%r, result=%r, chunks=%r of %r" + % (cur_word, result, chunk, chunks)) + cur_word.post_tags.append(chunk) + else: + assert False + + if not result: + return [token('', pre_tags=tag_accum)] + else: + result[-1].post_tags.extend(tag_accum) + + return result + + +# All the tags in HTML that don't require end tags: +empty_tags = ( + 'param', 'img', 'area', 'br', 'basefont', 'input', + 'base', 'meta', 'link', 'col') + +block_level_tags = ( + 'address', + 'blockquote', + 'center', + 'dir', + 'div', + 'dl', + 'fieldset', + 'form', + 'h1', + 'h2', + 'h3', + 'h4', + 'h5', + 'h6', + 'hr', + 'isindex', + 'menu', + 'noframes', + 'noscript', + 'ol', + 'p', + 'pre', + 'table', + 'ul', + ) + +block_level_container_tags = ( + 'dd', + 'dt', + 'frameset', + 'li', + 'tbody', + 'td', + 'tfoot', + 'th', + 'thead', + 'tr', + ) + + +def flatten_el(el, include_hrefs, skip_tag=False): + """ Takes an lxml element el, and generates all the text chunks for + that tag. Each start tag is a chunk, each word is a chunk, and each + end tag is a chunk. + + If skip_tag is true, then the outermost container tag is + not returned (just its contents).""" + if not skip_tag: + if el.tag == 'img': + yield ('img', el.get('src'), start_tag(el)) + else: + yield start_tag(el) + if el.tag in empty_tags and not el.text and not len(el) and not el.tail: + return + start_words = split_words(el.text) + for word in start_words: + yield html_escape(word) + for child in el: + yield from flatten_el(child, include_hrefs=include_hrefs) + if el.tag == 'a' and el.get('href') and include_hrefs: + yield ('href', el.get('href')) + if not skip_tag: + yield end_tag(el) + end_words = split_words(el.tail) + for word in end_words: + yield html_escape(word) + +split_words_re = re.compile(r'\S+(?:\s+|$)', re.U) + +def split_words(text): + """ Splits some text into words. Includes trailing whitespace + on each word when appropriate. """ + if not text or not text.strip(): + return [] + + words = split_words_re.findall(text) + return words + +start_whitespace_re = re.compile(r'^[ \t\n\r]') + +def start_tag(el): + """ + The text representation of the start tag for a tag. + """ + return '<%s%s>' % ( + el.tag, ''.join([' %s="%s"' % (name, html_escape(value, True)) + for name, value in el.attrib.items()])) + +def end_tag(el): + """ The text representation of an end tag for a tag. Includes + trailing whitespace when appropriate. """ + if el.tail and start_whitespace_re.search(el.tail): + extra = ' ' + else: + extra = '' + return '</%s>%s' % (el.tag, extra) + +def is_word(tok): + return not tok.startswith('<') + +def is_end_tag(tok): + return tok.startswith('</') + +def is_start_tag(tok): + return tok.startswith('<') and not tok.startswith('</') + +def fixup_ins_del_tags(html): + """ Given an html string, move any <ins> or <del> tags inside of any + block-level elements, e.g. transform <ins><p>word</p></ins> to + <p><ins>word</ins></p> """ + doc = parse_html(html, cleanup=False) + _fixup_ins_del_tags(doc) + html = serialize_html_fragment(doc, skip_outer=True) + return html + +def serialize_html_fragment(el, skip_outer=False): + """ Serialize a single lxml element as HTML. The serialized form + includes the elements tail. + + If skip_outer is true, then don't serialize the outermost tag + """ + assert not isinstance(el, basestring), ( + "You should pass in an element, not a string like %r" % el) + html = etree.tostring(el, method="html", encoding=_unicode) + if skip_outer: + # Get rid of the extra starting tag: + html = html[html.find('>')+1:] + # Get rid of the extra end tag: + html = html[:html.rfind('<')] + return html.strip() + else: + return html + +def _fixup_ins_del_tags(doc): + """fixup_ins_del_tags that works on an lxml document in-place + """ + for tag in ['ins', 'del']: + for el in doc.xpath('descendant-or-self::%s' % tag): + if not _contains_block_level_tag(el): + continue + _move_el_inside_block(el, tag=tag) + el.drop_tag() + #_merge_element_contents(el) + +def _contains_block_level_tag(el): + """True if the element contains any block-level elements, like <p>, <td>, etc. + """ + if el.tag in block_level_tags or el.tag in block_level_container_tags: + return True + for child in el: + if _contains_block_level_tag(child): + return True + return False + +def _move_el_inside_block(el, tag): + """ helper for _fixup_ins_del_tags; actually takes the <ins> etc tags + and moves them inside any block-level tags. """ + for child in el: + if _contains_block_level_tag(child): + break + else: + # No block-level tags in any child + children_tag = etree.Element(tag) + children_tag.text = el.text + el.text = None + children_tag.extend(list(el)) + el[:] = [children_tag] + return + for child in list(el): + if _contains_block_level_tag(child): + _move_el_inside_block(child, tag) + if child.tail: + tail_tag = etree.Element(tag) + tail_tag.text = child.tail + child.tail = None + el.insert(el.index(child)+1, tail_tag) + else: + child_tag = etree.Element(tag) + el.replace(child, child_tag) + child_tag.append(child) + if el.text: + text_tag = etree.Element(tag) + text_tag.text = el.text + el.text = None + el.insert(0, text_tag) + +def _merge_element_contents(el): + """ + Removes an element, but merges its contents into its place, e.g., + given <p>Hi <i>there!</i></p>, if you remove the <i> element you get + <p>Hi there!</p> + """ + parent = el.getparent() + text = el.text or '' + if el.tail: + if not len(el): + text += el.tail + else: + if el[-1].tail: + el[-1].tail += el.tail + else: + el[-1].tail = el.tail + index = parent.index(el) + if text: + if index == 0: + previous = None + else: + previous = parent[index-1] + if previous is None: + if parent.text: + parent.text += text + else: + parent.text = text + else: + if previous.tail: + previous.tail += text + else: + previous.tail = text + parent[index:index+1] = el.getchildren() + +class InsensitiveSequenceMatcher(difflib.SequenceMatcher): + """ + Acts like SequenceMatcher, but tries not to find very small equal + blocks amidst large spans of changes + """ + + threshold = 2 + + def get_matching_blocks(self): + size = min(len(self.b), len(self.b)) + threshold = min(self.threshold, size / 4) + actual = difflib.SequenceMatcher.get_matching_blocks(self) + return [item for item in actual + if item[2] > threshold + or not item[2]] + +if __name__ == '__main__': + from lxml.html import _diffcommand + _diffcommand.main() + diff --git a/.venv/lib/python3.12/site-packages/lxml/html/formfill.py b/.venv/lib/python3.12/site-packages/lxml/html/formfill.py new file mode 100644 index 00000000..9741c28b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/formfill.py @@ -0,0 +1,299 @@ +from lxml.etree import XPath, ElementBase +from lxml.html import fromstring, XHTML_NAMESPACE +from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result +from lxml.html import defs +import copy + +try: + basestring +except NameError: + # Python 3 + basestring = str + +__all__ = ['FormNotFound', 'fill_form', 'fill_form_html', + 'insert_errors', 'insert_errors_html', + 'DefaultErrorCreator'] + +class FormNotFound(LookupError): + """ + Raised when no form can be found + """ + +_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE}) +_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]), + namespaces={'x':XHTML_NAMESPACE}) +_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]', + namespaces={'x':XHTML_NAMESPACE}) +_name_xpath = XPath('descendant-or-self::*[@name=$name]') + +def fill_form( + el, + values, + form_id=None, + form_index=None, + ): + el = _find_form(el, form_id=form_id, form_index=form_index) + _fill_form(el, values) + +def fill_form_html(html, values, form_id=None, form_index=None): + result_type = type(html) + if isinstance(html, basestring): + doc = fromstring(html) + else: + doc = copy.deepcopy(html) + fill_form(doc, values, form_id=form_id, form_index=form_index) + return _transform_result(result_type, doc) + +def _fill_form(el, values): + counts = {} + if hasattr(values, 'mixed'): + # For Paste request parameters + values = values.mixed() + inputs = _input_xpath(el) + for input in inputs: + name = input.get('name') + if not name: + continue + if _takes_multiple(input): + value = values.get(name, []) + if not isinstance(value, (list, tuple)): + value = [value] + _fill_multiple(input, value) + elif name not in values: + continue + else: + index = counts.get(name, 0) + counts[name] = index + 1 + value = values[name] + if isinstance(value, (list, tuple)): + try: + value = value[index] + except IndexError: + continue + elif index > 0: + continue + _fill_single(input, value) + +def _takes_multiple(input): + if _nons(input.tag) == 'select' and input.get('multiple'): + # FIXME: multiple="0"? + return True + type = input.get('type', '').lower() + if type in ('radio', 'checkbox'): + return True + return False + +def _fill_multiple(input, value): + type = input.get('type', '').lower() + if type == 'checkbox': + v = input.get('value') + if v is None: + if not value: + result = False + else: + result = value[0] + if isinstance(value, basestring): + # The only valid "on" value for an unnamed checkbox is 'on' + result = result == 'on' + _check(input, result) + else: + _check(input, v in value) + elif type == 'radio': + v = input.get('value') + _check(input, v in value) + else: + assert _nons(input.tag) == 'select' + for option in _options_xpath(input): + v = option.get('value') + if v is None: + # This seems to be the default, at least on IE + # FIXME: but I'm not sure + v = option.text_content() + _select(option, v in value) + +def _check(el, check): + if check: + el.set('checked', '') + else: + if 'checked' in el.attrib: + del el.attrib['checked'] + +def _select(el, select): + if select: + el.set('selected', '') + else: + if 'selected' in el.attrib: + del el.attrib['selected'] + +def _fill_single(input, value): + if _nons(input.tag) == 'textarea': + input.text = value + else: + input.set('value', value) + +def _find_form(el, form_id=None, form_index=None): + if form_id is None and form_index is None: + forms = _forms_xpath(el) + for form in forms: + return form + raise FormNotFound( + "No forms in page") + if form_id is not None: + form = el.get_element_by_id(form_id) + if form is not None: + return form + forms = _form_name_xpath(el, name=form_id) + if forms: + return forms[0] + else: + raise FormNotFound( + "No form with the name or id of %r (forms: %s)" + % (id, ', '.join(_find_form_ids(el)))) + if form_index is not None: + forms = _forms_xpath(el) + try: + return forms[form_index] + except IndexError: + raise FormNotFound( + "There is no form with the index %r (%i forms found)" + % (form_index, len(forms))) + +def _find_form_ids(el): + forms = _forms_xpath(el) + if not forms: + yield '(no forms)' + return + for index, form in enumerate(forms): + if form.get('id'): + if form.get('name'): + yield '%s or %s' % (form.get('id'), + form.get('name')) + else: + yield form.get('id') + elif form.get('name'): + yield form.get('name') + else: + yield '(unnamed form %s)' % index + +############################################################ +## Error filling +############################################################ + +class DefaultErrorCreator: + insert_before = True + block_inside = True + error_container_tag = 'div' + error_message_class = 'error-message' + error_block_class = 'error-block' + default_message = "Invalid" + + def __init__(self, **kw): + for name, value in kw.items(): + if not hasattr(self, name): + raise TypeError( + "Unexpected keyword argument: %s" % name) + setattr(self, name, value) + + def __call__(self, el, is_block, message): + error_el = el.makeelement(self.error_container_tag) + if self.error_message_class: + error_el.set('class', self.error_message_class) + if is_block and self.error_block_class: + error_el.set('class', error_el.get('class', '')+' '+self.error_block_class) + if message is None or message == '': + message = self.default_message + if isinstance(message, ElementBase): + error_el.append(message) + else: + assert isinstance(message, basestring), ( + "Bad message; should be a string or element: %r" % message) + error_el.text = message or self.default_message + if is_block and self.block_inside: + if self.insert_before: + error_el.tail = el.text + el.text = None + el.insert(0, error_el) + else: + el.append(error_el) + else: + parent = el.getparent() + pos = parent.index(el) + if self.insert_before: + parent.insert(pos, error_el) + else: + error_el.tail = el.tail + el.tail = None + parent.insert(pos+1, error_el) + +default_error_creator = DefaultErrorCreator() + + +def insert_errors( + el, + errors, + form_id=None, + form_index=None, + error_class="error", + error_creator=default_error_creator, + ): + el = _find_form(el, form_id=form_id, form_index=form_index) + for name, error in errors.items(): + if error is None: + continue + for error_el, message in _find_elements_for_name(el, name, error): + assert isinstance(message, (basestring, type(None), ElementBase)), ( + "Bad message: %r" % message) + _insert_error(error_el, message, error_class, error_creator) + +def insert_errors_html(html, values, **kw): + result_type = type(html) + if isinstance(html, basestring): + doc = fromstring(html) + else: + doc = copy.deepcopy(html) + insert_errors(doc, values, **kw) + return _transform_result(result_type, doc) + +def _insert_error(el, error, error_class, error_creator): + if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea': + is_block = False + else: + is_block = True + if _nons(el.tag) != 'form' and error_class: + _add_class(el, error_class) + if el.get('id'): + labels = _label_for_xpath(el, for_id=el.get('id')) + if labels: + for label in labels: + _add_class(label, error_class) + error_creator(el, is_block, error) + +def _add_class(el, class_name): + if el.get('class'): + el.set('class', el.get('class')+' '+class_name) + else: + el.set('class', class_name) + +def _find_elements_for_name(form, name, error): + if name is None: + # An error for the entire form + yield form, error + return + if name.startswith('#'): + # By id + el = form.get_element_by_id(name[1:]) + if el is not None: + yield el, error + return + els = _name_xpath(form, name=name) + if not els: + # FIXME: should this raise an exception? + return + if not isinstance(error, (list, tuple)): + yield els[0], error + return + # FIXME: if error is longer than els, should it raise an error? + for el, err in zip(els, error): + if err is None: + continue + yield el, err diff --git a/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py b/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py new file mode 100644 index 00000000..2f7be156 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py @@ -0,0 +1,260 @@ +""" +An interface to html5lib that mimics the lxml.html interface. +""" +import sys +import string + +from html5lib import HTMLParser as _HTMLParser +from html5lib.treebuilders.etree_lxml import TreeBuilder +from lxml import etree +from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag + +# python3 compatibility +try: + _strings = basestring +except NameError: + _strings = (bytes, str) +try: + from urllib2 import urlopen +except ImportError: + from urllib.request import urlopen +try: + from urlparse import urlparse +except ImportError: + from urllib.parse import urlparse + + +class HTMLParser(_HTMLParser): + """An html5lib HTML parser with lxml as tree.""" + + def __init__(self, strict=False, **kwargs): + _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs) + + +try: + from html5lib import XHTMLParser as _XHTMLParser +except ImportError: + pass +else: + class XHTMLParser(_XHTMLParser): + """An html5lib XHTML Parser with lxml as tree.""" + + def __init__(self, strict=False, **kwargs): + _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs) + + xhtml_parser = XHTMLParser() + + +def _find_tag(tree, tag): + elem = tree.find(tag) + if elem is not None: + return elem + return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag)) + + +def document_fromstring(html, guess_charset=None, parser=None): + """ + Parse a whole document into a string. + + If `guess_charset` is true, or if the input is not Unicode but a + byte string, the `chardet` library will perform charset guessing + on the string. + """ + if not isinstance(html, _strings): + raise TypeError('string required') + + if parser is None: + parser = html_parser + + options = {} + if guess_charset is None and isinstance(html, bytes): + # html5lib does not accept useChardet as an argument, if it + # detected the html argument would produce unicode objects. + guess_charset = True + if guess_charset is not None: + options['useChardet'] = guess_charset + return parser.parse(html, **options).getroot() + + +def fragments_fromstring(html, no_leading_text=False, + guess_charset=None, parser=None): + """Parses several HTML elements, returning a list of elements. + + The first item in the list may be a string. If no_leading_text is true, + then it will be an error if there is leading text, and it will always be + a list of only elements. + + If `guess_charset` is true, the `chardet` library will perform charset + guessing on the string. + """ + if not isinstance(html, _strings): + raise TypeError('string required') + + if parser is None: + parser = html_parser + + options = {} + if guess_charset is None and isinstance(html, bytes): + # html5lib does not accept useChardet as an argument, if it + # detected the html argument would produce unicode objects. + guess_charset = False + if guess_charset is not None: + options['useChardet'] = guess_charset + children = parser.parseFragment(html, 'div', **options) + if children and isinstance(children[0], _strings): + if no_leading_text: + if children[0].strip(): + raise etree.ParserError('There is leading text: %r' % + children[0]) + del children[0] + return children + + +def fragment_fromstring(html, create_parent=False, + guess_charset=None, parser=None): + """Parses a single HTML element; it is an error if there is more than + one element, or if anything but whitespace precedes or follows the + element. + + If 'create_parent' is true (or is a tag name) then a parent node + will be created to encapsulate the HTML in a single element. In + this case, leading or trailing text is allowed. + + If `guess_charset` is true, the `chardet` library will perform charset + guessing on the string. + """ + if not isinstance(html, _strings): + raise TypeError('string required') + + accept_leading_text = bool(create_parent) + + elements = fragments_fromstring( + html, guess_charset=guess_charset, parser=parser, + no_leading_text=not accept_leading_text) + + if create_parent: + if not isinstance(create_parent, _strings): + create_parent = 'div' + new_root = Element(create_parent) + if elements: + if isinstance(elements[0], _strings): + new_root.text = elements[0] + del elements[0] + new_root.extend(elements) + return new_root + + if not elements: + raise etree.ParserError('No elements found') + if len(elements) > 1: + raise etree.ParserError('Multiple elements found') + result = elements[0] + if result.tail and result.tail.strip(): + raise etree.ParserError('Element followed by text: %r' % result.tail) + result.tail = None + return result + + +def fromstring(html, guess_charset=None, parser=None): + """Parse the html, returning a single element/document. + + This tries to minimally parse the chunk of text, without knowing if it + is a fragment or a document. + + 'base_url' will set the document's base_url attribute (and the tree's + docinfo.URL) + + If `guess_charset` is true, or if the input is not Unicode but a + byte string, the `chardet` library will perform charset guessing + on the string. + """ + if not isinstance(html, _strings): + raise TypeError('string required') + doc = document_fromstring(html, parser=parser, + guess_charset=guess_charset) + + # document starts with doctype or <html>, full document! + start = html[:50] + if isinstance(start, bytes): + # Allow text comparison in python3. + # Decode as ascii, that also covers latin-1 and utf-8 for the + # characters we need. + start = start.decode('ascii', 'replace') + + start = start.lstrip().lower() + if start.startswith('<html') or start.startswith('<!doctype'): + return doc + + head = _find_tag(doc, 'head') + + # if the head is not empty we have a full document + if len(head): + return doc + + body = _find_tag(doc, 'body') + + # The body has just one element, so it was probably a single + # element passed in + if (len(body) == 1 and (not body.text or not body.text.strip()) + and (not body[-1].tail or not body[-1].tail.strip())): + return body[0] + + # Now we have a body which represents a bunch of tags which have the + # content that was passed in. We will create a fake container, which + # is the body tag, except <body> implies too much structure. + if _contains_block_level_tag(body): + body.tag = 'div' + else: + body.tag = 'span' + return body + + +def parse(filename_url_or_file, guess_charset=None, parser=None): + """Parse a filename, URL, or file-like object into an HTML document + tree. Note: this returns a tree, not an element. Use + ``parse(...).getroot()`` to get the document root. + + If ``guess_charset`` is true, the ``useChardet`` option is passed into + html5lib to enable character detection. This option is on by default + when parsing from URLs, off by default when parsing from file(-like) + objects (which tend to return Unicode more often than not), and on by + default when parsing from a file path (which is read in binary mode). + """ + if parser is None: + parser = html_parser + if not isinstance(filename_url_or_file, _strings): + fp = filename_url_or_file + if guess_charset is None: + # assume that file-like objects return Unicode more often than bytes + guess_charset = False + elif _looks_like_url(filename_url_or_file): + fp = urlopen(filename_url_or_file) + if guess_charset is None: + # assume that URLs return bytes + guess_charset = True + else: + fp = open(filename_url_or_file, 'rb') + if guess_charset is None: + guess_charset = True + + options = {} + # html5lib does not accept useChardet as an argument, if it + # detected the html argument would produce unicode objects. + if guess_charset: + options['useChardet'] = guess_charset + return parser.parse(fp, **options) + + +def _looks_like_url(str): + scheme = urlparse(str)[0] + if not scheme: + return False + elif (sys.platform == 'win32' and + scheme in string.ascii_letters + and len(scheme) == 1): + # looks like a 'normal' absolute path + return False + else: + return True + + +html_parser = HTMLParser() diff --git a/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py b/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py new file mode 100644 index 00000000..b288a8a1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/soupparser.py @@ -0,0 +1,314 @@ +"""External interface to the BeautifulSoup HTML parser. +""" + +__all__ = ["fromstring", "parse", "convert_tree"] + +import re +from lxml import etree, html + +try: + from bs4 import ( + BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString, + Declaration, Doctype) + _DECLARATION_OR_DOCTYPE = (Declaration, Doctype) +except ImportError: + from BeautifulSoup import ( + BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString, + Declaration) + _DECLARATION_OR_DOCTYPE = Declaration + + +def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs): + """Parse a string of HTML data into an Element tree using the + BeautifulSoup parser. + + Returns the root ``<html>`` Element of the tree. + + You can pass a different BeautifulSoup parser through the + `beautifulsoup` keyword, and a diffent Element factory function + through the `makeelement` keyword. By default, the standard + ``BeautifulSoup`` class and the default factory of `lxml.html` are + used. + """ + return _parse(data, beautifulsoup, makeelement, **bsargs) + + +def parse(file, beautifulsoup=None, makeelement=None, **bsargs): + """Parse a file into an ElemenTree using the BeautifulSoup parser. + + You can pass a different BeautifulSoup parser through the + `beautifulsoup` keyword, and a diffent Element factory function + through the `makeelement` keyword. By default, the standard + ``BeautifulSoup`` class and the default factory of `lxml.html` are + used. + """ + if not hasattr(file, 'read'): + file = open(file) + root = _parse(file, beautifulsoup, makeelement, **bsargs) + return etree.ElementTree(root) + + +def convert_tree(beautiful_soup_tree, makeelement=None): + """Convert a BeautifulSoup tree to a list of Element trees. + + Returns a list instead of a single root Element to support + HTML-like soup with more than one root element. + + You can pass a different Element factory through the `makeelement` + keyword. + """ + root = _convert_tree(beautiful_soup_tree, makeelement) + children = root.getchildren() + for child in children: + root.remove(child) + return children + + +# helpers + +def _parse(source, beautifulsoup, makeelement, **bsargs): + if beautifulsoup is None: + beautifulsoup = BeautifulSoup + if hasattr(beautifulsoup, "HTML_ENTITIES"): # bs3 + if 'convertEntities' not in bsargs: + bsargs['convertEntities'] = 'html' + if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"): # bs4 + if 'features' not in bsargs: + bsargs['features'] = 'html.parser' # use Python html parser + tree = beautifulsoup(source, **bsargs) + root = _convert_tree(tree, makeelement) + # from ET: wrap the document in a html root element, if necessary + if len(root) == 1 and root[0].tag == "html": + return root[0] + root.tag = "html" + return root + + +_parse_doctype_declaration = re.compile( + r'(?:\s|[<!])*DOCTYPE\s*HTML' + r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?' + r'(?:\s+(\'[^\']*\'|"[^"]*"))?', + re.IGNORECASE).match + + +class _PseudoTag: + # Minimal imitation of BeautifulSoup.Tag + def __init__(self, contents): + self.name = 'html' + self.attrs = [] + self.contents = contents + + def __iter__(self): + return self.contents.__iter__() + + +def _convert_tree(beautiful_soup_tree, makeelement): + if makeelement is None: + makeelement = html.html_parser.makeelement + + # Split the tree into three parts: + # i) everything before the root element: document type + # declaration, comments, processing instructions, whitespace + # ii) the root(s), + # iii) everything after the root: comments, processing + # instructions, whitespace + first_element_idx = last_element_idx = None + html_root = declaration = None + for i, e in enumerate(beautiful_soup_tree): + if isinstance(e, Tag): + if first_element_idx is None: + first_element_idx = i + last_element_idx = i + if html_root is None and e.name and e.name.lower() == 'html': + html_root = e + elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE): + declaration = e + + # For a nice, well-formatted document, the variable roots below is + # a list consisting of a single <html> element. However, the document + # may be a soup like '<meta><head><title>Hello</head><body>Hi + # all<\p>'. In this example roots is a list containing meta, head + # and body elements. + if first_element_idx is None: + pre_root = post_root = [] + roots = beautiful_soup_tree.contents + else: + pre_root = beautiful_soup_tree.contents[:first_element_idx] + roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1] + post_root = beautiful_soup_tree.contents[last_element_idx+1:] + + # Reorganize so that there is one <html> root... + if html_root is not None: + # ... use existing one if possible, ... + i = roots.index(html_root) + html_root.contents = roots[:i] + html_root.contents + roots[i+1:] + else: + # ... otherwise create a new one. + html_root = _PseudoTag(roots) + + convert_node = _init_node_converters(makeelement) + + # Process pre_root + res_root = convert_node(html_root) + prev = res_root + for e in reversed(pre_root): + converted = convert_node(e) + if converted is not None: + prev.addprevious(converted) + prev = converted + + # ditto for post_root + prev = res_root + for e in post_root: + converted = convert_node(e) + if converted is not None: + prev.addnext(converted) + prev = converted + + if declaration is not None: + try: + # bs4 provides full Doctype string + doctype_string = declaration.output_ready() + except AttributeError: + doctype_string = declaration.string + + match = _parse_doctype_declaration(doctype_string) + if not match: + # Something is wrong if we end up in here. Since soupparser should + # tolerate errors, do not raise Exception, just let it pass. + pass + else: + external_id, sys_uri = match.groups() + docinfo = res_root.getroottree().docinfo + # strip quotes and update DOCTYPE values (any of None, '', '...') + docinfo.public_id = external_id and external_id[1:-1] + docinfo.system_url = sys_uri and sys_uri[1:-1] + + return res_root + + +def _init_node_converters(makeelement): + converters = {} + ordered_node_types = [] + + def converter(*types): + def add(handler): + for t in types: + converters[t] = handler + ordered_node_types.append(t) + return handler + return add + + def find_best_converter(node): + for t in ordered_node_types: + if isinstance(node, t): + return converters[t] + return None + + def convert_node(bs_node, parent=None): + # duplicated in convert_tag() below + try: + handler = converters[type(bs_node)] + except KeyError: + handler = converters[type(bs_node)] = find_best_converter(bs_node) + if handler is None: + return None + return handler(bs_node, parent) + + def map_attrs(bs_attrs): + if isinstance(bs_attrs, dict): # bs4 + attribs = {} + for k, v in bs_attrs.items(): + if isinstance(v, list): + v = " ".join(v) + attribs[k] = unescape(v) + else: + attribs = {k: unescape(v) for k, v in bs_attrs} + return attribs + + def append_text(parent, text): + if len(parent) == 0: + parent.text = (parent.text or '') + text + else: + parent[-1].tail = (parent[-1].tail or '') + text + + # converters are tried in order of their definition + + @converter(Tag, _PseudoTag) + def convert_tag(bs_node, parent): + attrs = bs_node.attrs + if parent is not None: + attribs = map_attrs(attrs) if attrs else None + res = etree.SubElement(parent, bs_node.name, attrib=attribs) + else: + attribs = map_attrs(attrs) if attrs else {} + res = makeelement(bs_node.name, attrib=attribs) + + for child in bs_node: + # avoid double recursion by inlining convert_node(), see above + try: + handler = converters[type(child)] + except KeyError: + pass + else: + if handler is not None: + handler(child, res) + continue + convert_node(child, res) + return res + + @converter(Comment) + def convert_comment(bs_node, parent): + res = html.HtmlComment(bs_node) + if parent is not None: + parent.append(res) + return res + + @converter(ProcessingInstruction) + def convert_pi(bs_node, parent): + if bs_node.endswith('?'): + # The PI is of XML style (<?as df?>) but BeautifulSoup + # interpreted it as being SGML style (<?as df>). Fix. + bs_node = bs_node[:-1] + res = etree.ProcessingInstruction(*bs_node.split(' ', 1)) + if parent is not None: + parent.append(res) + return res + + @converter(NavigableString) + def convert_text(bs_node, parent): + if parent is not None: + append_text(parent, unescape(bs_node)) + return None + + return convert_node + + +# copied from ET's ElementSoup + +try: + from html.entities import name2codepoint # Python 3 +except ImportError: + from htmlentitydefs import name2codepoint + + +handle_entities = re.compile(r"&(\w+);").sub + + +try: + unichr +except NameError: + # Python 3 + unichr = chr + + +def unescape(string): + if not string: + return '' + # work around oddities in BeautifulSoup's entity handling + def unescape_entity(m): + try: + return unichr(name2codepoint[m.group(1)]) + except KeyError: + return m.group(0) # use as is + return handle_entities(unescape_entity, string) diff --git a/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py b/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py new file mode 100644 index 00000000..f352a1cc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/html/usedoctest.py @@ -0,0 +1,13 @@ +"""Doctest module for HTML comparison. + +Usage:: + + >>> import lxml.html.usedoctest + >>> # now do your HTML doctests ... + +See `lxml.doctestcompare`. +""" + +from lxml import doctestcompare + +doctestcompare.temp_install(html=True, del_module=__name__) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/__init__.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/__init__.pxd new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/__init__.pxd diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/__init__.py b/.venv/lib/python3.12/site-packages/lxml/includes/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/__init__.py diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/c14n.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/c14n.pxd new file mode 100644 index 00000000..8b1f3c4c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/c14n.pxd @@ -0,0 +1,25 @@ +from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar +from lxml.includes.xpath cimport xmlNodeSet + +cdef extern from "libxml/c14n.h" nogil: + cdef int xmlC14NDocDumpMemory(xmlDoc* doc, + xmlNodeSet* nodes, + int exclusive, + xmlChar** inclusive_ns_prefixes, + int with_comments, + xmlChar** doc_txt_ptr) + + cdef int xmlC14NDocSave(xmlDoc* doc, + xmlNodeSet* nodes, + int exclusive, + xmlChar** inclusive_ns_prefixes, + int with_comments, + char* filename, + int compression) + + cdef int xmlC14NDocSaveTo(xmlDoc* doc, + xmlNodeSet* nodes, + int exclusive, + xmlChar** inclusive_ns_prefixes, + int with_comments, + xmlOutputBuffer* buffer) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd new file mode 100644 index 00000000..9c04438f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd @@ -0,0 +1,3 @@ +cdef extern from "etree_defs.h": + cdef bint ENABLE_THREADING + cdef bint ENABLE_SCHEMATRON diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/dtdvalid.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/dtdvalid.pxd new file mode 100644 index 00000000..2ad49db1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/dtdvalid.pxd @@ -0,0 +1,18 @@ +from lxml.includes cimport tree +from lxml.includes.tree cimport xmlDoc, xmlDtd + +cdef extern from "libxml/valid.h" nogil: + ctypedef void (*xmlValidityErrorFunc)(void * ctx, const char * msg, ...) noexcept + ctypedef void (*xmlValidityWarningFunc)(void * ctx, const char * msg, ...) noexcept + + ctypedef struct xmlValidCtxt: + void *userData + xmlValidityErrorFunc error + xmlValidityWarningFunc warning + + cdef xmlValidCtxt* xmlNewValidCtxt() + cdef void xmlFreeValidCtxt(xmlValidCtxt* cur) + + cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd) + cdef tree.xmlElement* xmlGetDtdElementDesc( + xmlDtd* dtd, tree.const_xmlChar* name) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/etree_defs.h b/.venv/lib/python3.12/site-packages/lxml/includes/etree_defs.h new file mode 100644 index 00000000..17d470d0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/etree_defs.h @@ -0,0 +1,379 @@ +#ifndef HAS_ETREE_DEFS_H +#define HAS_ETREE_DEFS_H + +/* quick check for Python/libxml2/libxslt devel setup */ +#include "Python.h" +#ifndef PY_VERSION_HEX +# error the development package of Python (header files etc.) is not installed correctly +#elif PY_VERSION_HEX < 0x03060000 +# error this version of lxml requires Python 3.6 or later +#endif + +#include "libxml/xmlversion.h" +#ifndef LIBXML_VERSION +# error the development package of libxml2 (header files etc.) is not installed correctly +#elif LIBXML_VERSION < 20700 +# error minimum required version of libxml2 is 2.7.0 +#endif + +#include "libxslt/xsltconfig.h" +#ifndef LIBXSLT_VERSION +# error the development package of libxslt (header files etc.) is not installed correctly +#elif LIBXSLT_VERSION < 10123 +# error minimum required version of libxslt is 1.1.23 +#endif + + +/* v_arg functions */ +#define va_int(ap) va_arg(ap, int) +#define va_charptr(ap) va_arg(ap, char *) + +#ifdef PYPY_VERSION +# define IS_PYPY 1 +#else +# define IS_PYPY 0 +#endif + +/* unused */ +#define IS_PYTHON2 0 +#define IS_PYTHON3 1 +#undef LXML_UNICODE_STRINGS +#define LXML_UNICODE_STRINGS 1 + +#if !IS_PYPY +# define PyWeakref_LockObject(obj) (NULL) +#endif + +/* Threading is not currently supported by PyPy */ +#if IS_PYPY +# ifndef WITHOUT_THREADING +# define WITHOUT_THREADING +# endif +#endif + +#if IS_PYPY +# ifndef PyUnicode_FromFormat +# define PyUnicode_FromFormat PyString_FromFormat +# endif +# if !defined(PyBytes_FromFormat) +# ifdef PyString_FromFormat +# define PyBytes_FromFormat PyString_FromFormat +# else +#include <stdarg.h> +static PyObject* PyBytes_FromFormat(const char* format, ...) { + PyObject *string; + va_list vargs; +#ifdef HAVE_STDARG_PROTOTYPES + va_start(vargs, format); +#else + va_start(vargs); +#endif + string = PyUnicode_FromFormatV(format, vargs); + va_end(vargs); + if (string && PyUnicode_Check(string)) { + PyObject *bstring = PyUnicode_AsUTF8String(string); + Py_DECREF(string); + string = bstring; + } + if (string && !PyBytes_CheckExact(string)) { + Py_DECREF(string); + string = NULL; + PyErr_SetString(PyExc_TypeError, "String formatting and encoding failed to return bytes object"); + } + return string; +} +# endif +# endif +#endif + +#if PY_VERSION_HEX >= 0x030B00A1 +/* Python 3.12 doesn't have wstr Unicode strings any more. */ +#undef PyUnicode_GET_DATA_SIZE +#define PyUnicode_GET_DATA_SIZE(ustr) (0) +#undef PyUnicode_AS_DATA +#define PyUnicode_AS_DATA(ustr) (NULL) +#undef PyUnicode_IS_READY +#define PyUnicode_IS_READY(ustr) (1) +#endif + +#ifdef WITHOUT_THREADING +# undef PyEval_SaveThread +# define PyEval_SaveThread() (NULL) +# undef PyEval_RestoreThread +# define PyEval_RestoreThread(state) if (state); else {} +# undef PyGILState_Ensure +# define PyGILState_Ensure() (PyGILState_UNLOCKED) +# undef PyGILState_Release +# define PyGILState_Release(state) if (state); else {} +# undef Py_UNBLOCK_THREADS +# define Py_UNBLOCK_THREADS _save = NULL; +# undef Py_BLOCK_THREADS +# define Py_BLOCK_THREADS if (_save); else {} +#endif + +#ifdef WITHOUT_THREADING +# define ENABLE_THREADING 0 +#else +# define ENABLE_THREADING 1 +#endif + +#if LIBXML_VERSION < 20704 +/* FIXME: hack to make new error reporting compile in old libxml2 versions */ +# define xmlStructuredErrorContext NULL +# define xmlXIncludeProcessTreeFlagsData(n,o,d) xmlXIncludeProcessTreeFlags(n,o) +#endif + +/* schematron was added in libxml2 2.6.21 */ +#ifdef LIBXML_SCHEMATRON_ENABLED +# define ENABLE_SCHEMATRON 1 +#else +# define ENABLE_SCHEMATRON 0 +# define XML_SCHEMATRON_OUT_QUIET 0 +# define XML_SCHEMATRON_OUT_XML 0 +# define XML_SCHEMATRON_OUT_ERROR 0 + typedef void xmlSchematron; + typedef void xmlSchematronParserCtxt; + typedef void xmlSchematronValidCtxt; +# define xmlSchematronNewDocParserCtxt(doc) NULL +# define xmlSchematronNewParserCtxt(file) NULL +# define xmlSchematronParse(ctxt) NULL +# define xmlSchematronFreeParserCtxt(ctxt) +# define xmlSchematronFree(schema) +# define xmlSchematronNewValidCtxt(schema, options) NULL +# define xmlSchematronValidateDoc(ctxt, doc) 0 +# define xmlSchematronFreeValidCtxt(ctxt) +# define xmlSchematronSetValidStructuredErrors(ctxt, errorfunc, data) +#endif + +#if LIBXML_VERSION < 20708 +# define HTML_PARSE_NODEFDTD 4 +#endif +#if LIBXML_VERSION < 20900 +# define XML_PARSE_BIG_LINES 4194304 +#endif + +#include "libxml/tree.h" +#ifndef LIBXML2_NEW_BUFFER + typedef xmlBuffer xmlBuf; +# define xmlBufContent(buf) xmlBufferContent(buf) +# define xmlBufUse(buf) xmlBufferLength(buf) +#endif + +/* libexslt 1.1.25+ support EXSLT functions in XPath */ +#if LIBXSLT_VERSION < 10125 +#define exsltDateXpathCtxtRegister(ctxt, prefix) +#define exsltSetsXpathCtxtRegister(ctxt, prefix) +#define exsltMathXpathCtxtRegister(ctxt, prefix) +#define exsltStrXpathCtxtRegister(ctxt, prefix) +#endif + +#define LXML_GET_XSLT_ENCODING(result_var, style) XSLT_GET_IMPORT_PTR(result_var, style, encoding) + +/* work around MSDEV 6.0 */ +#if (_MSC_VER == 1200) && (WINVER < 0x0500) +long _ftol( double ); //defined by VC6 C libs +long _ftol2( double dblSource ) { return _ftol( dblSource ); } +#endif + +#ifdef __GNUC__ +/* Test for GCC > 2.95 */ +#if __GNUC__ > 2 || (__GNUC__ == 2 && (__GNUC_MINOR__ > 95)) +#define unlikely_condition(x) __builtin_expect((x), 0) +#else /* __GNUC__ > 2 ... */ +#define unlikely_condition(x) (x) +#endif /* __GNUC__ > 2 ... */ +#else /* __GNUC__ */ +#define unlikely_condition(x) (x) +#endif /* __GNUC__ */ + +#ifndef Py_TYPE + #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) +#endif + +#define PY_NEW(T) \ + (((PyTypeObject*)(T))->tp_new( \ + (PyTypeObject*)(T), __pyx_empty_tuple, NULL)) + +#define _fqtypename(o) ((Py_TYPE(o))->tp_name) + +#define lxml_malloc(count, item_size) \ + (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ + (PyMem_Malloc((count) * item_size))) + +#define lxml_realloc(mem, count, item_size) \ + (unlikely_condition((size_t)(count) > (size_t) (PY_SSIZE_T_MAX / item_size)) ? NULL : \ + (PyMem_Realloc(mem, (count) * item_size))) + +#define lxml_free(mem) PyMem_Free(mem) + +#define _isString(obj) (PyUnicode_Check(obj) || PyBytes_Check(obj)) + +#define _isElement(c_node) \ + (((c_node)->type == XML_ELEMENT_NODE) || \ + ((c_node)->type == XML_COMMENT_NODE) || \ + ((c_node)->type == XML_ENTITY_REF_NODE) || \ + ((c_node)->type == XML_PI_NODE)) + +#define _isElementOrXInclude(c_node) \ + (_isElement(c_node) || \ + ((c_node)->type == XML_XINCLUDE_START) || \ + ((c_node)->type == XML_XINCLUDE_END)) + +#define _getNs(c_node) \ + (((c_node)->ns == 0) ? 0 : ((c_node)->ns->href)) + + +#include "string.h" +static void* lxml_unpack_xmldoc_capsule(PyObject* capsule, int* is_owned) { + xmlDoc *c_doc; + void *context; + *is_owned = 0; + if (unlikely_condition(!PyCapsule_IsValid(capsule, (const char*)"libxml2:xmlDoc"))) { + PyErr_SetString( + PyExc_TypeError, + "Not a valid capsule. The capsule argument must be a capsule object with name libxml2:xmlDoc"); + return NULL; + } + c_doc = (xmlDoc*) PyCapsule_GetPointer(capsule, (const char*)"libxml2:xmlDoc"); + if (unlikely_condition(!c_doc)) return NULL; + + if (unlikely_condition(c_doc->type != XML_DOCUMENT_NODE && c_doc->type != XML_HTML_DOCUMENT_NODE)) { + PyErr_Format( + PyExc_ValueError, + "Illegal document provided: expected XML or HTML, found %d", (int)c_doc->type); + return NULL; + } + + context = PyCapsule_GetContext(capsule); + if (unlikely_condition(!context && PyErr_Occurred())) return NULL; + if (context && strcmp((const char*) context, "destructor:xmlFreeDoc") == 0) { + /* take ownership by setting destructor to NULL */ + if (PyCapsule_SetDestructor(capsule, NULL) == 0) { + /* ownership transferred => invalidate capsule by clearing its name */ + if (unlikely_condition(PyCapsule_SetName(capsule, NULL))) { + /* this should never happen since everything above succeeded */ + xmlFreeDoc(c_doc); + return NULL; + } + *is_owned = 1; + } + } + return c_doc; +} + +/* Macro pair implementation of a depth first tree walker + * + * Calls the code block between the BEGIN and END macros for all elements + * below c_tree_top (exclusively), starting at c_node (inclusively iff + * 'inclusive' is 1). The _ELEMENT_ variants will only stop on nodes + * that match _isElement(), the normal variant will stop on every node + * except text nodes. + * + * To traverse the node and all of its children and siblings in Pyrex, call + * cdef xmlNode* some_node + * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 1) + * # do something with some_node + * END_FOR_EACH_ELEMENT_FROM(some_node) + * + * To traverse only the children and siblings of a node, call + * cdef xmlNode* some_node + * BEGIN_FOR_EACH_ELEMENT_FROM(some_node.parent, some_node, 0) + * # do something with some_node + * END_FOR_EACH_ELEMENT_FROM(some_node) + * + * To traverse only the children, do: + * cdef xmlNode* some_node + * some_node = parent_node.children + * BEGIN_FOR_EACH_ELEMENT_FROM(parent_node, some_node, 1) + * # do something with some_node + * END_FOR_EACH_ELEMENT_FROM(some_node) + * + * NOTE: 'some_node' MUST be a plain 'xmlNode*' ! + * + * NOTE: parent modification during the walk can divert the iterator, but + * should not segfault ! + */ + +#define _LX__ELEMENT_MATCH(c_node, only_elements) \ + ((only_elements) ? (_isElement(c_node)) : 1) + +#define _LX__ADVANCE_TO_NEXT(c_node, only_elements) \ + while ((c_node != 0) && (!_LX__ELEMENT_MATCH(c_node, only_elements))) \ + c_node = c_node->next; + +#define _LX__TRAVERSE_TO_NEXT(c_stop_node, c_node, only_elements) \ +{ \ + /* walk through children first */ \ + xmlNode* _lx__next = c_node->children; \ + if (_lx__next != 0) { \ + if (c_node->type == XML_ENTITY_REF_NODE || c_node->type == XML_DTD_NODE) { \ + _lx__next = 0; \ + } else { \ + _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ + } \ + } \ + if ((_lx__next == 0) && (c_node != c_stop_node)) { \ + /* try siblings */ \ + _lx__next = c_node->next; \ + _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ + /* back off through parents */ \ + while (_lx__next == 0) { \ + c_node = c_node->parent; \ + if (c_node == 0) \ + break; \ + if (c_node == c_stop_node) \ + break; \ + if ((only_elements) && !_isElement(c_node)) \ + break; \ + /* we already traversed the parents -> siblings */ \ + _lx__next = c_node->next; \ + _LX__ADVANCE_TO_NEXT(_lx__next, only_elements) \ + } \ + } \ + c_node = _lx__next; \ +} + +#define _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, only_elements) \ +{ \ + if (c_node != 0) { \ + const xmlNode* _lx__tree_top = (c_tree_top); \ + const int _lx__only_elements = (only_elements); \ + /* make sure we start at an element */ \ + if (!_LX__ELEMENT_MATCH(c_node, _lx__only_elements)) { \ + /* we skip the node, so 'inclusive' is irrelevant */ \ + if (c_node == _lx__tree_top) \ + c_node = 0; /* nothing to traverse */ \ + else { \ + c_node = c_node->next; \ + _LX__ADVANCE_TO_NEXT(c_node, _lx__only_elements) \ + } \ + } else if (! (inclusive)) { \ + /* skip the first node */ \ + _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ + } \ + \ + /* now run the user code on the elements we find */ \ + while (c_node != 0) { \ + /* here goes the code to be run for each element */ + +#define _LX__END_FOR_EACH_FROM(c_node) \ + _LX__TRAVERSE_TO_NEXT(_lx__tree_top, c_node, _lx__only_elements) \ + } \ + } \ +} + + +#define BEGIN_FOR_EACH_ELEMENT_FROM(c_tree_top, c_node, inclusive) \ + _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 1) + +#define END_FOR_EACH_ELEMENT_FROM(c_node) \ + _LX__END_FOR_EACH_FROM(c_node) + +#define BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive) \ + _LX__BEGIN_FOR_EACH_FROM(c_tree_top, c_node, inclusive, 0) + +#define END_FOR_EACH_FROM(c_node) \ + _LX__END_FOR_EACH_FROM(c_node) + + +#endif /* HAS_ETREE_DEFS_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/etreepublic.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/etreepublic.pxd new file mode 100644 index 00000000..7ef001b1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/etreepublic.pxd @@ -0,0 +1,237 @@ +# public Cython/C interface to lxml.etree + +from lxml.includes cimport tree +from lxml.includes.tree cimport const_xmlChar + +cdef extern from "lxml-version.h": + cdef char* LXML_VERSION_STRING + +cdef extern from "etree_defs.h": + # test if c_node is considered an Element (i.e. Element, Comment, etc.) + cdef bint _isElement(tree.xmlNode* c_node) noexcept nogil + + # return the namespace URI of the node or NULL + cdef const_xmlChar* _getNs(tree.xmlNode* node) noexcept nogil + + # pair of macros for tree traversal + cdef void BEGIN_FOR_EACH_ELEMENT_FROM(tree.xmlNode* tree_top, + tree.xmlNode* start_node, + int start_node_inclusive) noexcept nogil + cdef void END_FOR_EACH_ELEMENT_FROM(tree.xmlNode* start_node) noexcept nogil + +cdef extern from "etree_api.h": + + # first function to call! + cdef int import_lxml__etree() except -1 + + ########################################################################## + # public ElementTree API classes + + cdef class lxml.etree._Document [ object LxmlDocument ]: + cdef tree.xmlDoc* _c_doc + + cdef class lxml.etree._Element [ object LxmlElement ]: + cdef _Document _doc + cdef tree.xmlNode* _c_node + + cdef class lxml.etree.ElementBase(_Element) [ object LxmlElementBase ]: + pass + + cdef class lxml.etree._ElementTree [ object LxmlElementTree ]: + cdef _Document _doc + cdef _Element _context_node + + cdef class lxml.etree.ElementClassLookup [ object LxmlElementClassLookup ]: + cdef object (*_lookup_function)(object, _Document, tree.xmlNode*) + + cdef class lxml.etree.FallbackElementClassLookup(ElementClassLookup) \ + [ object LxmlFallbackElementClassLookup ]: + cdef ElementClassLookup fallback + cdef object (*_fallback_function)(object, _Document, tree.xmlNode*) + + ########################################################################## + # creating Element objects + + # create an Element for a C-node in the Document + cdef _Element elementFactory(_Document doc, tree.xmlNode* c_node) + + # create an ElementTree for an Element + cdef _ElementTree elementTreeFactory(_Element context_node) + + # create an ElementTree subclass for an Element + cdef _ElementTree newElementTree(_Element context_node, object subclass) + + # create an ElementTree from an external document + cdef _ElementTree adoptExternalDocument(tree.xmlDoc* c_doc, parser, bint is_owned) + + # create a new Element for an existing or new document (doc = None) + # builds Python object after setting text, tail, namespaces and attributes + cdef _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap) + + # create a new SubElement for an existing parent + # builds Python object after setting text, tail, namespaces and attributes + cdef _Element makeSubElement(_Element parent, tag, text, tail, + attrib, nsmap) + + # deep copy a node to include it in the Document + cdef _Element deepcopyNodeToDocument(_Document doc, tree.xmlNode* c_root) + + # set the internal lookup function for Element/Comment/PI classes + # use setElementClassLookupFunction(NULL, None) to reset it + # note that the lookup function *must always* return an _Element subclass! + cdef void setElementClassLookupFunction( + object (*function)(object, _Document, tree.xmlNode*), object state) + + # lookup function that always returns the default Element class + # note that the first argument is expected to be None! + cdef object lookupDefaultElementClass(_1, _Document _2, + tree.xmlNode* c_node) + + # lookup function for namespace/tag specific Element classes + # note that the first argument is expected to be None! + cdef object lookupNamespaceElementClass(_1, _Document _2, + tree.xmlNode* c_node) + + # call the fallback lookup function of a FallbackElementClassLookup + cdef object callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, tree.xmlNode* c_node) + + ########################################################################## + # XML attribute access + + # return an attribute value for a C attribute on a C element node + cdef unicode attributeValue(tree.xmlNode* c_element, + tree.xmlAttr* c_attrib_node) + + # return the value of the attribute with 'ns' and 'name' (or None) + cdef unicode attributeValueFromNsName(tree.xmlNode* c_element, + const_xmlChar* c_ns, const_xmlChar* c_name) + + # return the value of attribute "{ns}name", or the default value + cdef object getAttributeValue(_Element element, key, default) + + # return an iterator over attribute names (1), values (2) or items (3) + # attributes must not be removed during iteration! + cdef object iterattributes(_Element element, int keysvalues) + + # return the list of all attribute names (1), values (2) or items (3) + cdef list collectAttributes(tree.xmlNode* c_element, int keysvalues) + + # set an attribute value on an element + # on failure, sets an exception and returns -1 + cdef int setAttributeValue(_Element element, key, value) except -1 + + # delete an attribute + # on failure, sets an exception and returns -1 + cdef int delAttribute(_Element element, key) except -1 + + # delete an attribute based on name and namespace URI + # returns -1 if the attribute was not found (no exception) + cdef int delAttributeFromNsName(tree.xmlNode* c_element, + const_xmlChar* c_href, const_xmlChar* c_name) noexcept + + ########################################################################## + # XML node helper functions + + # check if the element has at least one child + cdef bint hasChild(tree.xmlNode* c_node) noexcept nogil + + # find child element number 'index' (supports negative indexes) + cdef tree.xmlNode* findChild(tree.xmlNode* c_node, + Py_ssize_t index) noexcept nogil + + # find child element number 'index' starting at first one + cdef tree.xmlNode* findChildForwards(tree.xmlNode* c_node, + Py_ssize_t index) nogil + + # find child element number 'index' starting at last one + cdef tree.xmlNode* findChildBackwards(tree.xmlNode* c_node, + Py_ssize_t index) nogil + + # return next/previous sibling element of the node + cdef tree.xmlNode* nextElement(tree.xmlNode* c_node) nogil + cdef tree.xmlNode* previousElement(tree.xmlNode* c_node) nogil + + ########################################################################## + # iterators (DEPRECATED API, don't use in new code!) + + cdef class lxml.etree._ElementTagMatcher [ object LxmlElementTagMatcher ]: + cdef char* _href + cdef char* _name + + # store "{ns}tag" (or None) filter for this matcher or element iterator + # ** unless _href *and* _name are set up 'by hand', this function *must* + # ** be called when subclassing the iterator below! + cdef void initTagMatch(_ElementTagMatcher matcher, tag) + + cdef class lxml.etree._ElementIterator(_ElementTagMatcher) [ + object LxmlElementIterator ]: + cdef _Element _node + cdef tree.xmlNode* (*_next_element)(tree.xmlNode*) + + # store the initial node of the iterator if it matches the required tag + # or its next matching sibling if not + cdef void iteratorStoreNext(_ElementIterator iterator, _Element node) + + ########################################################################## + # other helper functions + + # check if a C node matches a tag name and namespace + # (NULL allowed for each => always matches) + cdef int tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name) + + # convert a UTF-8 char* to a Python unicode string + cdef unicode pyunicode(const_xmlChar* s) + + # convert the string to UTF-8 using the normal lxml.etree semantics + cdef bytes utf8(object s) + + # split a tag into a (URI, name) tuple, return None as URI for '{}tag' + cdef tuple getNsTag(object tag) + + # split a tag into a (URI, name) tuple, return b'' as URI for '{}tag' + cdef tuple getNsTagWithEmptyNs(object tag) + + # get the "{ns}tag" string for a C node + cdef unicode namespacedName(tree.xmlNode* c_node) + + # get the "{ns}tag" string for a href/tagname pair (c_ns may be NULL) + cdef unicode namespacedNameFromNsName(const_xmlChar* c_ns, const_xmlChar* c_tag) + + # check if the node has a text value (which may be '') + cdef bint hasText(tree.xmlNode* c_node) nogil + + # check if the node has a tail value (which may be '') + cdef bint hasTail(tree.xmlNode* c_node) nogil + + # get the text content of an element (or None) + cdef unicode textOf(tree.xmlNode* c_node) + + # get the tail content of an element (or None) + cdef unicode tailOf(tree.xmlNode* c_node) + + # set the text value of an element + cdef int setNodeText(tree.xmlNode* c_node, text) except -1 + + # set the tail text value of an element + cdef int setTailText(tree.xmlNode* c_node, text) except -1 + + # append an element to the children of a parent element + # deprecated: don't use, does not propagate exceptions! + # use appendChildToElement() instead + cdef void appendChild(_Element parent, _Element child) + + # added in lxml 3.3 as a safe replacement for appendChild() + # return -1 for exception, 0 for ok + cdef int appendChildToElement(_Element parent, _Element child) except -1 + + # recursively lookup a namespace in element or ancestors, or create it + cdef tree.xmlNs* findOrBuildNodeNsPrefix( + _Document doc, tree.xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix) + + # find the Document of an Element, ElementTree or Document (itself!) + cdef _Document documentOrRaise(object input) + + # find the root Element of an Element (itself!), ElementTree or Document + cdef _Element rootNodeOrRaise(object input) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/__init__.py b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/__init__.py diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/libcharset.h b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/libcharset.h new file mode 100644 index 00000000..fcf22748 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/libcharset.h @@ -0,0 +1,45 @@ +/* Copyright (C) 2003 Free Software Foundation, Inc. + This file is part of the GNU CHARSET Library. + + The GNU CHARSET Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + The GNU CHARSET Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU CHARSET Library; see the file COPYING.LIB. If not, + see <https://www.gnu.org/licenses/>. */ + +#ifndef _LIBCHARSET_H +#define _LIBCHARSET_H + +#include <localcharset.h> + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Support for relocatable packages. */ + +/* Sets the original and the current installation prefix of the package. + Relocation simply replaces a pathname starting with the original prefix + by the corresponding pathname with the current prefix instead. Both + prefixes should be directory names without trailing slash (i.e. use "" + instead of "/"). */ +extern void libcharset_set_relocation_prefix (const char *orig_prefix, + const char *curr_prefix); + + +#ifdef __cplusplus +} +#endif + + +#endif /* _LIBCHARSET_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/localcharset.h b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/localcharset.h new file mode 100644 index 00000000..34ce0add --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/localcharset.h @@ -0,0 +1,137 @@ +/* Determine a canonical name for the current locale's character encoding. + Copyright (C) 2000-2003, 2009-2019 Free Software Foundation, Inc. + This file is part of the GNU CHARSET Library. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU Lesser General Public License as published + by the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program; if not, see <https://www.gnu.org/licenses/>. */ + +#ifndef _LOCALCHARSET_H +#define _LOCALCHARSET_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +/* Determine the current locale's character encoding, and canonicalize it + into one of the canonical names listed below. + The result must not be freed; it is statically allocated. The result + becomes invalid when setlocale() is used to change the global locale, or + when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG + is changed; threads in multithreaded programs should not do this. + If the canonical name cannot be determined, the result is a non-canonical + name. */ +extern const char * locale_charset (void); + +/* About GNU canonical names for character encodings: + + Every canonical name must be supported by GNU libiconv. Support by GNU libc + is also desirable. + + The name is case insensitive. Usually an upper case MIME charset name is + preferred. + + The current list of these GNU canonical names is: + + name MIME? used by which systems + (darwin = Mac OS X, windows = native Windows) + + ASCII, ANSI_X3.4-1968 glibc solaris freebsd netbsd darwin minix cygwin + ISO-8859-1 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-2 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-3 Y glibc solaris cygwin + ISO-8859-4 Y hpux osf solaris freebsd netbsd openbsd darwin + ISO-8859-5 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-6 Y glibc aix hpux solaris cygwin + ISO-8859-7 Y glibc aix hpux irix osf solaris freebsd netbsd openbsd darwin cygwin zos + ISO-8859-8 Y glibc aix hpux osf solaris cygwin zos + ISO-8859-9 Y glibc aix hpux irix osf solaris freebsd darwin cygwin zos + ISO-8859-13 glibc hpux solaris freebsd netbsd openbsd darwin cygwin + ISO-8859-14 glibc cygwin + ISO-8859-15 glibc aix irix osf solaris freebsd netbsd openbsd darwin cygwin + KOI8-R Y glibc hpux solaris freebsd netbsd openbsd darwin + KOI8-U Y glibc freebsd netbsd openbsd darwin cygwin + KOI8-T glibc + CP437 dos + CP775 dos + CP850 aix osf dos + CP852 dos + CP855 dos + CP856 aix + CP857 dos + CP861 dos + CP862 dos + CP864 dos + CP865 dos + CP866 freebsd netbsd openbsd darwin dos + CP869 dos + CP874 windows dos + CP922 aix + CP932 aix cygwin windows dos + CP943 aix zos + CP949 osf darwin windows dos + CP950 windows dos + CP1046 aix + CP1124 aix + CP1125 dos + CP1129 aix + CP1131 freebsd darwin + CP1250 windows + CP1251 glibc hpux solaris freebsd netbsd openbsd darwin cygwin windows + CP1252 aix windows + CP1253 windows + CP1254 windows + CP1255 glibc windows + CP1256 windows + CP1257 windows + GB2312 Y glibc aix hpux irix solaris freebsd netbsd darwin cygwin zos + EUC-JP Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin + EUC-KR Y glibc aix hpux irix osf solaris freebsd netbsd darwin cygwin zos + EUC-TW glibc aix hpux irix osf solaris netbsd + BIG5 Y glibc aix hpux osf solaris freebsd netbsd darwin cygwin zos + BIG5-HKSCS glibc hpux solaris netbsd darwin + GBK glibc aix osf solaris freebsd darwin cygwin windows dos + GB18030 glibc hpux solaris freebsd netbsd darwin + SHIFT_JIS Y hpux osf solaris freebsd netbsd darwin + JOHAB glibc solaris windows + TIS-620 glibc aix hpux osf solaris cygwin zos + VISCII Y glibc + TCVN5712-1 glibc + ARMSCII-8 glibc freebsd netbsd darwin + GEORGIAN-PS glibc cygwin + PT154 glibc netbsd cygwin + HP-ROMAN8 hpux + HP-ARABIC8 hpux + HP-GREEK8 hpux + HP-HEBREW8 hpux + HP-TURKISH8 hpux + HP-KANA8 hpux + DEC-KANJI osf + DEC-HANYU osf + UTF-8 Y glibc aix hpux osf solaris netbsd darwin cygwin zos + + Note: Names which are not marked as being a MIME name should not be used in + Internet protocols for information interchange (mail, news, etc.). + + Note: ASCII and ANSI_X3.4-1968 are synonymous canonical names. Applications + must understand both names and treat them as equivalent. + */ + + +#ifdef __cplusplus +} +#endif + + +#endif /* _LOCALCHARSET_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zconf.h b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zconf.h new file mode 100644 index 00000000..ede3c82e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zconf.h @@ -0,0 +1,543 @@ +/* zconf.h -- configuration of the zlib compression library + * Copyright (C) 1995-2024 Jean-loup Gailly, Mark Adler + * For conditions of distribution and use, see copyright notice in zlib.h + */ + +/* @(#) $Id$ */ + +#ifndef ZCONF_H +#define ZCONF_H + +/* + * If you *really* need a unique prefix for all types and library functions, + * compile with -DZ_PREFIX. The "standard" zlib should be compiled without it. + * Even better than compiling with -DZ_PREFIX would be to use configure to set + * this permanently in zconf.h using "./configure --zprefix". + */ +#ifdef Z_PREFIX /* may be set to #if 1 by ./configure */ +# define Z_PREFIX_SET + +/* all linked symbols and init macros */ +# define _dist_code z__dist_code +# define _length_code z__length_code +# define _tr_align z__tr_align +# define _tr_flush_bits z__tr_flush_bits +# define _tr_flush_block z__tr_flush_block +# define _tr_init z__tr_init +# define _tr_stored_block z__tr_stored_block +# define _tr_tally z__tr_tally +# define adler32 z_adler32 +# define adler32_combine z_adler32_combine +# define adler32_combine64 z_adler32_combine64 +# define adler32_z z_adler32_z +# ifndef Z_SOLO +# define compress z_compress +# define compress2 z_compress2 +# define compressBound z_compressBound +# endif +# define crc32 z_crc32 +# define crc32_combine z_crc32_combine +# define crc32_combine64 z_crc32_combine64 +# define crc32_combine_gen z_crc32_combine_gen +# define crc32_combine_gen64 z_crc32_combine_gen64 +# define crc32_combine_op z_crc32_combine_op +# define crc32_z z_crc32_z +# define deflate z_deflate +# define deflateBound z_deflateBound +# define deflateCopy z_deflateCopy +# define deflateEnd z_deflateEnd +# define deflateGetDictionary z_deflateGetDictionary +# define deflateInit z_deflateInit +# define deflateInit2 z_deflateInit2 +# define deflateInit2_ z_deflateInit2_ +# define deflateInit_ z_deflateInit_ +# define deflateParams z_deflateParams +# define deflatePending z_deflatePending +# define deflatePrime z_deflatePrime +# define deflateReset z_deflateReset +# define deflateResetKeep z_deflateResetKeep +# define deflateSetDictionary z_deflateSetDictionary +# define deflateSetHeader z_deflateSetHeader +# define deflateTune z_deflateTune +# define deflate_copyright z_deflate_copyright +# define get_crc_table z_get_crc_table +# ifndef Z_SOLO +# define gz_error z_gz_error +# define gz_intmax z_gz_intmax +# define gz_strwinerror z_gz_strwinerror +# define gzbuffer z_gzbuffer +# define gzclearerr z_gzclearerr +# define gzclose z_gzclose +# define gzclose_r z_gzclose_r +# define gzclose_w z_gzclose_w +# define gzdirect z_gzdirect +# define gzdopen z_gzdopen +# define gzeof z_gzeof +# define gzerror z_gzerror +# define gzflush z_gzflush +# define gzfread z_gzfread +# define gzfwrite z_gzfwrite +# define gzgetc z_gzgetc +# define gzgetc_ z_gzgetc_ +# define gzgets z_gzgets +# define gzoffset z_gzoffset +# define gzoffset64 z_gzoffset64 +# define gzopen z_gzopen +# define gzopen64 z_gzopen64 +# ifdef _WIN32 +# define gzopen_w z_gzopen_w +# endif +# define gzprintf z_gzprintf +# define gzputc z_gzputc +# define gzputs z_gzputs +# define gzread z_gzread +# define gzrewind z_gzrewind +# define gzseek z_gzseek +# define gzseek64 z_gzseek64 +# define gzsetparams z_gzsetparams +# define gztell z_gztell +# define gztell64 z_gztell64 +# define gzungetc z_gzungetc +# define gzvprintf z_gzvprintf +# define gzwrite z_gzwrite +# endif +# define inflate z_inflate +# define inflateBack z_inflateBack +# define inflateBackEnd z_inflateBackEnd +# define inflateBackInit z_inflateBackInit +# define inflateBackInit_ z_inflateBackInit_ +# define inflateCodesUsed z_inflateCodesUsed +# define inflateCopy z_inflateCopy +# define inflateEnd z_inflateEnd +# define inflateGetDictionary z_inflateGetDictionary +# define inflateGetHeader z_inflateGetHeader +# define inflateInit z_inflateInit +# define inflateInit2 z_inflateInit2 +# define inflateInit2_ z_inflateInit2_ +# define inflateInit_ z_inflateInit_ +# define inflateMark z_inflateMark +# define inflatePrime z_inflatePrime +# define inflateReset z_inflateReset +# define inflateReset2 z_inflateReset2 +# define inflateResetKeep z_inflateResetKeep +# define inflateSetDictionary z_inflateSetDictionary +# define inflateSync z_inflateSync +# define inflateSyncPoint z_inflateSyncPoint +# define inflateUndermine z_inflateUndermine +# define inflateValidate z_inflateValidate +# define inflate_copyright z_inflate_copyright +# define inflate_fast z_inflate_fast +# define inflate_table z_inflate_table +# ifndef Z_SOLO +# define uncompress z_uncompress +# define uncompress2 z_uncompress2 +# endif +# define zError z_zError +# ifndef Z_SOLO +# define zcalloc z_zcalloc +# define zcfree z_zcfree +# endif +# define zlibCompileFlags z_zlibCompileFlags +# define zlibVersion z_zlibVersion + +/* all zlib typedefs in zlib.h and zconf.h */ +# define Byte z_Byte +# define Bytef z_Bytef +# define alloc_func z_alloc_func +# define charf z_charf +# define free_func z_free_func +# ifndef Z_SOLO +# define gzFile z_gzFile +# endif +# define gz_header z_gz_header +# define gz_headerp z_gz_headerp +# define in_func z_in_func +# define intf z_intf +# define out_func z_out_func +# define uInt z_uInt +# define uIntf z_uIntf +# define uLong z_uLong +# define uLongf z_uLongf +# define voidp z_voidp +# define voidpc z_voidpc +# define voidpf z_voidpf + +/* all zlib structs in zlib.h and zconf.h */ +# define gz_header_s z_gz_header_s +# define internal_state z_internal_state + +#endif + +#if defined(__MSDOS__) && !defined(MSDOS) +# define MSDOS +#endif +#if (defined(OS_2) || defined(__OS2__)) && !defined(OS2) +# define OS2 +#endif +#if defined(_WINDOWS) && !defined(WINDOWS) +# define WINDOWS +#endif +#if defined(_WIN32) || defined(_WIN32_WCE) || defined(__WIN32__) +# ifndef WIN32 +# define WIN32 +# endif +#endif +#if (defined(MSDOS) || defined(OS2) || defined(WINDOWS)) && !defined(WIN32) +# if !defined(__GNUC__) && !defined(__FLAT__) && !defined(__386__) +# ifndef SYS16BIT +# define SYS16BIT +# endif +# endif +#endif + +/* + * Compile with -DMAXSEG_64K if the alloc function cannot allocate more + * than 64k bytes at a time (needed on systems with 16-bit int). + */ +#ifdef SYS16BIT +# define MAXSEG_64K +#endif +#ifdef MSDOS +# define UNALIGNED_OK +#endif + +#ifdef __STDC_VERSION__ +# ifndef STDC +# define STDC +# endif +# if __STDC_VERSION__ >= 199901L +# ifndef STDC99 +# define STDC99 +# endif +# endif +#endif +#if !defined(STDC) && (defined(__STDC__) || defined(__cplusplus)) +# define STDC +#endif +#if !defined(STDC) && (defined(__GNUC__) || defined(__BORLANDC__)) +# define STDC +#endif +#if !defined(STDC) && (defined(MSDOS) || defined(WINDOWS) || defined(WIN32)) +# define STDC +#endif +#if !defined(STDC) && (defined(OS2) || defined(__HOS_AIX__)) +# define STDC +#endif + +#if defined(__OS400__) && !defined(STDC) /* iSeries (formerly AS/400). */ +# define STDC +#endif + +#ifndef STDC +# ifndef const /* cannot use !defined(STDC) && !defined(const) on Mac */ +# define const /* note: need a more gentle solution here */ +# endif +#endif + +#if defined(ZLIB_CONST) && !defined(z_const) +# define z_const const +#else +# define z_const +#endif + +#ifdef Z_SOLO +# ifdef _WIN64 + typedef unsigned long long z_size_t; +# else + typedef unsigned long z_size_t; +# endif +#else +# define z_longlong long long +# if defined(NO_SIZE_T) + typedef unsigned NO_SIZE_T z_size_t; +# elif defined(STDC) +# include <stddef.h> + typedef size_t z_size_t; +# else + typedef unsigned long z_size_t; +# endif +# undef z_longlong +#endif + +/* Maximum value for memLevel in deflateInit2 */ +#ifndef MAX_MEM_LEVEL +# ifdef MAXSEG_64K +# define MAX_MEM_LEVEL 8 +# else +# define MAX_MEM_LEVEL 9 +# endif +#endif + +/* Maximum value for windowBits in deflateInit2 and inflateInit2. + * WARNING: reducing MAX_WBITS makes minigzip unable to extract .gz files + * created by gzip. (Files created by minigzip can still be extracted by + * gzip.) + */ +#ifndef MAX_WBITS +# define MAX_WBITS 15 /* 32K LZ77 window */ +#endif + +/* The memory requirements for deflate are (in bytes): + (1 << (windowBits+2)) + (1 << (memLevel+9)) + that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values) + plus a few kilobytes for small objects. For example, if you want to reduce + the default memory requirements from 256K to 128K, compile with + make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7" + Of course this will generally degrade compression (there's no free lunch). + + The memory requirements for inflate are (in bytes) 1 << windowBits + that is, 32K for windowBits=15 (default value) plus about 7 kilobytes + for small objects. +*/ + + /* Type declarations */ + +#ifndef OF /* function prototypes */ +# ifdef STDC +# define OF(args) args +# else +# define OF(args) () +# endif +#endif + +/* The following definitions for FAR are needed only for MSDOS mixed + * model programming (small or medium model with some far allocations). + * This was tested only with MSC; for other MSDOS compilers you may have + * to define NO_MEMCPY in zutil.h. If you don't need the mixed model, + * just define FAR to be empty. + */ +#ifdef SYS16BIT +# if defined(M_I86SM) || defined(M_I86MM) + /* MSC small or medium model */ +# define SMALL_MEDIUM +# ifdef _MSC_VER +# define FAR _far +# else +# define FAR far +# endif +# endif +# if (defined(__SMALL__) || defined(__MEDIUM__)) + /* Turbo C small or medium model */ +# define SMALL_MEDIUM +# ifdef __BORLANDC__ +# define FAR _far +# else +# define FAR far +# endif +# endif +#endif + +#if defined(WINDOWS) || defined(WIN32) + /* If building or using zlib as a DLL, define ZLIB_DLL. + * This is not mandatory, but it offers a little performance increase. + */ +# ifdef ZLIB_DLL +# if defined(WIN32) && (!defined(__BORLANDC__) || (__BORLANDC__ >= 0x500)) +# ifdef ZLIB_INTERNAL +# define ZEXTERN extern __declspec(dllexport) +# else +# define ZEXTERN extern __declspec(dllimport) +# endif +# endif +# endif /* ZLIB_DLL */ + /* If building or using zlib with the WINAPI/WINAPIV calling convention, + * define ZLIB_WINAPI. + * Caution: the standard ZLIB1.DLL is NOT compiled using ZLIB_WINAPI. + */ +# ifdef ZLIB_WINAPI +# ifdef FAR +# undef FAR +# endif +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include <windows.h> + /* No need for _export, use ZLIB.DEF instead. */ + /* For complete Windows compatibility, use WINAPI, not __stdcall. */ +# define ZEXPORT WINAPI +# ifdef WIN32 +# define ZEXPORTVA WINAPIV +# else +# define ZEXPORTVA FAR CDECL +# endif +# endif +#endif + +#if defined (__BEOS__) +# ifdef ZLIB_DLL +# ifdef ZLIB_INTERNAL +# define ZEXPORT __declspec(dllexport) +# define ZEXPORTVA __declspec(dllexport) +# else +# define ZEXPORT __declspec(dllimport) +# define ZEXPORTVA __declspec(dllimport) +# endif +# endif +#endif + +#ifndef ZEXTERN +# define ZEXTERN extern +#endif +#ifndef ZEXPORT +# define ZEXPORT +#endif +#ifndef ZEXPORTVA +# define ZEXPORTVA +#endif + +#ifndef FAR +# define FAR +#endif + +#if !defined(__MACTYPES__) +typedef unsigned char Byte; /* 8 bits */ +#endif +typedef unsigned int uInt; /* 16 bits or more */ +typedef unsigned long uLong; /* 32 bits or more */ + +#ifdef SMALL_MEDIUM + /* Borland C/C++ and some old MSC versions ignore FAR inside typedef */ +# define Bytef Byte FAR +#else + typedef Byte FAR Bytef; +#endif +typedef char FAR charf; +typedef int FAR intf; +typedef uInt FAR uIntf; +typedef uLong FAR uLongf; + +#ifdef STDC + typedef void const *voidpc; + typedef void FAR *voidpf; + typedef void *voidp; +#else + typedef Byte const *voidpc; + typedef Byte FAR *voidpf; + typedef Byte *voidp; +#endif + +#if !defined(Z_U4) && !defined(Z_SOLO) && defined(STDC) +# include <limits.h> +# if (UINT_MAX == 0xffffffffUL) +# define Z_U4 unsigned +# elif (ULONG_MAX == 0xffffffffUL) +# define Z_U4 unsigned long +# elif (USHRT_MAX == 0xffffffffUL) +# define Z_U4 unsigned short +# endif +#endif + +#ifdef Z_U4 + typedef Z_U4 z_crc_t; +#else + typedef unsigned long z_crc_t; +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_UNISTD_H +#endif + +#if 1 /* was set to #if 1 by ./configure */ +# define Z_HAVE_STDARG_H +#endif + +#ifdef STDC +# ifndef Z_SOLO +# include <sys/types.h> /* for off_t */ +# endif +#endif + +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +# include <stdarg.h> /* for va_list */ +# endif +#endif + +#ifdef _WIN32 +# ifndef Z_SOLO +# include <stddef.h> /* for wchar_t */ +# endif +#endif + +/* a little trick to accommodate both "#define _LARGEFILE64_SOURCE" and + * "#define _LARGEFILE64_SOURCE 1" as requesting 64-bit operations, (even + * though the former does not conform to the LFS document), but considering + * both "#undef _LARGEFILE64_SOURCE" and "#define _LARGEFILE64_SOURCE 0" as + * equivalently requesting no 64-bit operations + */ +#if defined(_LARGEFILE64_SOURCE) && -_LARGEFILE64_SOURCE - -1 == 1 +# undef _LARGEFILE64_SOURCE +#endif + +#ifndef Z_HAVE_UNISTD_H +# ifdef __WATCOMC__ +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_HAVE_UNISTD_H +# if defined(_LARGEFILE64_SOURCE) && !defined(_WIN32) +# define Z_HAVE_UNISTD_H +# endif +#endif +#ifndef Z_SOLO +# if defined(Z_HAVE_UNISTD_H) +# include <unistd.h> /* for SEEK_*, off_t, and _LFS64_LARGEFILE */ +# ifdef VMS +# include <unixio.h> /* for off_t */ +# endif +# ifndef z_off_t +# define z_off_t off_t +# endif +# endif +#endif + +#if defined(_LFS64_LARGEFILE) && _LFS64_LARGEFILE-0 +# define Z_LFS64 +#endif + +#if defined(_LARGEFILE64_SOURCE) && defined(Z_LFS64) +# define Z_LARGE64 +#endif + +#if defined(_FILE_OFFSET_BITS) && _FILE_OFFSET_BITS-0 == 64 && defined(Z_LFS64) +# define Z_WANT64 +#endif + +#if !defined(SEEK_SET) && !defined(Z_SOLO) +# define SEEK_SET 0 /* Seek from beginning of file. */ +# define SEEK_CUR 1 /* Seek from current position. */ +# define SEEK_END 2 /* Set file pointer to EOF plus "offset" */ +#endif + +#ifndef z_off_t +# define z_off_t long +#endif + +#if !defined(_WIN32) && defined(Z_LARGE64) +# define z_off64_t off64_t +#else +# if defined(_WIN32) && !defined(__GNUC__) +# define z_off64_t __int64 +# else +# define z_off64_t z_off_t +# endif +#endif + +/* MVS linker does not support external names larger than 8 bytes */ +#if defined(__MVS__) + #pragma map(deflateInit_,"DEIN") + #pragma map(deflateInit2_,"DEIN2") + #pragma map(deflateEnd,"DEEND") + #pragma map(deflateBound,"DEBND") + #pragma map(inflateInit_,"ININ") + #pragma map(inflateInit2_,"ININ2") + #pragma map(inflateEnd,"INEND") + #pragma map(inflateSync,"INSY") + #pragma map(inflateSetDictionary,"INSEDI") + #pragma map(compressBound,"CMBND") + #pragma map(inflate_table,"INTABL") + #pragma map(inflate_fast,"INFA") + #pragma map(inflate_copyright,"INCOPY") +#endif + +#endif /* ZCONF_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zlib.h b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zlib.h new file mode 100644 index 00000000..8d4b932e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/extlibs/zlib.h @@ -0,0 +1,1938 @@ +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.3.1, January 22nd, 2024 + + Copyright (C) 1995-2024 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + + + The data format used by the zlib library is described by RFCs (Request for + Comments) 1950 to 1952 in the files http://tools.ietf.org/html/rfc1950 + (zlib format), rfc1951 (deflate format) and rfc1952 (gzip format). +*/ + +#ifndef ZLIB_H +#define ZLIB_H + +#include "zconf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define ZLIB_VERSION "1.3.1" +#define ZLIB_VERNUM 0x1310 +#define ZLIB_VER_MAJOR 1 +#define ZLIB_VER_MINOR 3 +#define ZLIB_VER_REVISION 1 +#define ZLIB_VER_SUBREVISION 0 + +/* + The 'zlib' compression library provides in-memory compression and + decompression functions, including integrity checks of the uncompressed data. + This version of the library supports only one compression method (deflation) + but other algorithms will be added later and will have the same stream + interface. + + Compression can be done in a single step if the buffers are large enough, + or can be done by repeated calls of the compression function. In the latter + case, the application must provide more input and/or consume the output + (providing more output space) before each call. + + The compressed data format used by default by the in-memory functions is + the zlib format, which is a zlib wrapper documented in RFC 1950, wrapped + around a deflate stream, which is itself documented in RFC 1951. + + The library also supports reading and writing files in gzip (.gz) format + with an interface similar to that of stdio using the functions that start + with "gz". The gzip format is different from the zlib format. gzip is a + gzip wrapper, documented in RFC 1952, wrapped around a deflate stream. + + This library can optionally read and write gzip and raw deflate streams in + memory as well. + + The zlib format was designed to be compact and fast for use in memory + and on communications channels. The gzip format was designed for single- + file compression on file systems, has a larger header than zlib to maintain + directory information, and uses a different, slower check method than zlib. + + The library does not install any signal handler. The decoder checks + the consistency of the compressed data, so the library should never crash + even in the case of corrupted input. +*/ + +typedef voidpf (*alloc_func)(voidpf opaque, uInt items, uInt size); +typedef void (*free_func)(voidpf opaque, voidpf address); + +struct internal_state; + +typedef struct z_stream_s { + z_const Bytef *next_in; /* next input byte */ + uInt avail_in; /* number of bytes available at next_in */ + uLong total_in; /* total number of input bytes read so far */ + + Bytef *next_out; /* next output byte will go here */ + uInt avail_out; /* remaining free space at next_out */ + uLong total_out; /* total number of bytes output so far */ + + z_const char *msg; /* last error message, NULL if no error */ + struct internal_state FAR *state; /* not visible by applications */ + + alloc_func zalloc; /* used to allocate the internal state */ + free_func zfree; /* used to free the internal state */ + voidpf opaque; /* private data object passed to zalloc and zfree */ + + int data_type; /* best guess about the data type: binary or text + for deflate, or the decoding state for inflate */ + uLong adler; /* Adler-32 or CRC-32 value of the uncompressed data */ + uLong reserved; /* reserved for future use */ +} z_stream; + +typedef z_stream FAR *z_streamp; + +/* + gzip header information passed to and from zlib routines. See RFC 1952 + for more details on the meanings of these fields. +*/ +typedef struct gz_header_s { + int text; /* true if compressed data believed to be text */ + uLong time; /* modification time */ + int xflags; /* extra flags (not used when writing a gzip file) */ + int os; /* operating system */ + Bytef *extra; /* pointer to extra field or Z_NULL if none */ + uInt extra_len; /* extra field length (valid if extra != Z_NULL) */ + uInt extra_max; /* space at extra (only when reading header) */ + Bytef *name; /* pointer to zero-terminated file name or Z_NULL */ + uInt name_max; /* space at name (only when reading header) */ + Bytef *comment; /* pointer to zero-terminated comment or Z_NULL */ + uInt comm_max; /* space at comment (only when reading header) */ + int hcrc; /* true if there was or will be a header crc */ + int done; /* true when done reading gzip header (not used + when writing a gzip file) */ +} gz_header; + +typedef gz_header FAR *gz_headerp; + +/* + The application must update next_in and avail_in when avail_in has dropped + to zero. It must update next_out and avail_out when avail_out has dropped + to zero. The application must initialize zalloc, zfree and opaque before + calling the init function. All other fields are set by the compression + library and must not be updated by the application. + + The opaque value provided by the application will be passed as the first + parameter for calls of zalloc and zfree. This can be useful for custom + memory management. The compression library attaches no meaning to the + opaque value. + + zalloc must return Z_NULL if there is not enough memory for the object. + If zlib is used in a multi-threaded application, zalloc and zfree must be + thread safe. In that case, zlib is thread-safe. When zalloc and zfree are + Z_NULL on entry to the initialization function, they are set to internal + routines that use the standard library functions malloc() and free(). + + On 16-bit systems, the functions zalloc and zfree must be able to allocate + exactly 65536 bytes, but will not be required to allocate more than this if + the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS, pointers + returned by zalloc for objects of exactly 65536 bytes *must* have their + offset normalized to zero. The default allocation function provided by this + library ensures this (see zutil.c). To reduce memory requirements and avoid + any allocation of 64K objects, at the expense of compression ratio, compile + the library with -DMAX_WBITS=14 (see zconf.h). + + The fields total_in and total_out can be used for statistics or progress + reports. After compression, total_in holds the total size of the + uncompressed data and may be saved for use by the decompressor (particularly + if the decompressor wants to decompress everything in a single step). +*/ + + /* constants */ + +#define Z_NO_FLUSH 0 +#define Z_PARTIAL_FLUSH 1 +#define Z_SYNC_FLUSH 2 +#define Z_FULL_FLUSH 3 +#define Z_FINISH 4 +#define Z_BLOCK 5 +#define Z_TREES 6 +/* Allowed flush values; see deflate() and inflate() below for details */ + +#define Z_OK 0 +#define Z_STREAM_END 1 +#define Z_NEED_DICT 2 +#define Z_ERRNO (-1) +#define Z_STREAM_ERROR (-2) +#define Z_DATA_ERROR (-3) +#define Z_MEM_ERROR (-4) +#define Z_BUF_ERROR (-5) +#define Z_VERSION_ERROR (-6) +/* Return codes for the compression/decompression functions. Negative values + * are errors, positive values are used for special but normal events. + */ + +#define Z_NO_COMPRESSION 0 +#define Z_BEST_SPEED 1 +#define Z_BEST_COMPRESSION 9 +#define Z_DEFAULT_COMPRESSION (-1) +/* compression levels */ + +#define Z_FILTERED 1 +#define Z_HUFFMAN_ONLY 2 +#define Z_RLE 3 +#define Z_FIXED 4 +#define Z_DEFAULT_STRATEGY 0 +/* compression strategy; see deflateInit2() below for details */ + +#define Z_BINARY 0 +#define Z_TEXT 1 +#define Z_ASCII Z_TEXT /* for compatibility with 1.2.2 and earlier */ +#define Z_UNKNOWN 2 +/* Possible values of the data_type field for deflate() */ + +#define Z_DEFLATED 8 +/* The deflate compression method (the only one supported in this version) */ + +#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */ + +#define zlib_version zlibVersion() +/* for compatibility with versions < 1.0.2 */ + + + /* basic functions */ + +ZEXTERN const char * ZEXPORT zlibVersion(void); +/* The application can compare zlibVersion and ZLIB_VERSION for consistency. + If the first character differs, the library code actually used is not + compatible with the zlib.h header file used by the application. This check + is automatically made by deflateInit and inflateInit. + */ + +/* +ZEXTERN int ZEXPORT deflateInit(z_streamp strm, int level); + + Initializes the internal stream state for compression. The fields + zalloc, zfree and opaque must be initialized before by the caller. If + zalloc and zfree are set to Z_NULL, deflateInit updates them to use default + allocation functions. total_in, total_out, adler, and msg are initialized. + + The compression level must be Z_DEFAULT_COMPRESSION, or between 0 and 9: + 1 gives best speed, 9 gives best compression, 0 gives no compression at all + (the input data is simply copied a block at a time). Z_DEFAULT_COMPRESSION + requests a default compromise between speed and compression (currently + equivalent to level 6). + + deflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if level is not a valid compression level, or + Z_VERSION_ERROR if the zlib library version (zlib_version) is incompatible + with the version assumed by the caller (ZLIB_VERSION). msg is set to null + if there is no error message. deflateInit does not perform any compression: + this will be done by deflate(). +*/ + + +ZEXTERN int ZEXPORT deflate(z_streamp strm, int flush); +/* + deflate compresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. deflate performs one or both of the + following actions: + + - Compress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), next_in and avail_in are updated and + processing will resume at this point for the next call of deflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. This action is forced if the parameter flush is non zero. + Forcing flush frequently degrades the compression ratio, so this parameter + should be set only when necessary. Some output may be provided even if + flush is zero. + + Before the call of deflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating avail_in or avail_out accordingly; avail_out should + never be zero before the call. The application can consume the compressed + output when it wants, for example when the output buffer is full (avail_out + == 0), or after each call of deflate(). If deflate returns Z_OK and with + zero avail_out, it must be called again after making room in the output + buffer because there might be more output pending. See deflatePending(), + which can be used if desired to determine whether or not there is more output + in that case. + + Normally the parameter flush is set to Z_NO_FLUSH, which allows deflate to + decide how much data to accumulate before producing output, in order to + maximize compression. + + If the parameter flush is set to Z_SYNC_FLUSH, all pending output is + flushed to the output buffer and the output is aligned on a byte boundary, so + that the decompressor can get all input data available so far. (In + particular avail_in is zero after the call if enough output space has been + provided before the call.) Flushing may degrade compression for some + compression algorithms and so it should be used only when necessary. This + completes the current deflate block and follows it with an empty stored block + that is three bits plus filler bits to the next byte, followed by four bytes + (00 00 ff ff). + + If flush is set to Z_PARTIAL_FLUSH, all pending output is flushed to the + output buffer, but the output is not aligned to a byte boundary. All of the + input data so far will be available to the decompressor, as for Z_SYNC_FLUSH. + This completes the current deflate block and follows it with an empty fixed + codes block that is 10 bits long. This assures that enough bytes are output + in order for the decompressor to finish the block before the empty fixed + codes block. + + If flush is set to Z_BLOCK, a deflate block is completed and emitted, as + for Z_SYNC_FLUSH, but the output is not aligned on a byte boundary, and up to + seven bits of the current block are held to be written as the next byte after + the next deflate block is completed. In this case, the decompressor may not + be provided enough bits at this point in order to complete decompression of + the data provided so far to the compressor. It may need to wait for the next + block to be emitted. This is for advanced applications that need to control + the emission of deflate blocks. + + If flush is set to Z_FULL_FLUSH, all output is flushed as with + Z_SYNC_FLUSH, and the compression state is reset so that decompression can + restart from this point if previous compressed data has been damaged or if + random access is desired. Using Z_FULL_FLUSH too often can seriously degrade + compression. + + If deflate returns with avail_out == 0, this function must be called again + with the same value of the flush parameter and more output space (updated + avail_out), until the flush is complete (deflate returns with non-zero + avail_out). In the case of a Z_FULL_FLUSH or Z_SYNC_FLUSH, make sure that + avail_out is greater than six when the flush marker begins, in order to avoid + repeated flush markers upon calling deflate() again when avail_out == 0. + + If the parameter flush is set to Z_FINISH, pending input is processed, + pending output is flushed and deflate returns with Z_STREAM_END if there was + enough output space. If deflate returns with Z_OK or Z_BUF_ERROR, this + function must be called again with Z_FINISH and more output space (updated + avail_out) but no more input data, until it returns with Z_STREAM_END or an + error. After deflate has returned Z_STREAM_END, the only possible operations + on the stream are deflateReset or deflateEnd. + + Z_FINISH can be used in the first deflate call after deflateInit if all the + compression is to be done in a single step. In order to complete in one + call, avail_out must be at least the value returned by deflateBound (see + below). Then deflate is guaranteed to return Z_STREAM_END. If not enough + output space is provided, deflate will not return Z_STREAM_END, and it must + be called again as described above. + + deflate() sets strm->adler to the Adler-32 checksum of all input read + so far (that is, total_in bytes). If a gzip stream is being generated, then + strm->adler will be the CRC-32 checksum of the input read so far. (See + deflateInit2 below.) + + deflate() may update strm->data_type if it can make a good guess about + the input data type (Z_BINARY or Z_TEXT). If in doubt, the data is + considered binary. This field is only for information purposes and does not + affect the compression algorithm in any manner. + + deflate() returns Z_OK if some progress has been made (more input + processed or more output produced), Z_STREAM_END if all input has been + consumed and all output has been produced (only when flush is set to + Z_FINISH), Z_STREAM_ERROR if the stream state was inconsistent (for example + if next_in or next_out was Z_NULL or the state was inadvertently written over + by the application), or Z_BUF_ERROR if no progress is possible (for example + avail_in or avail_out was zero). Note that Z_BUF_ERROR is not fatal, and + deflate() can be called again with more input and more output space to + continue compressing. +*/ + + +ZEXTERN int ZEXPORT deflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + deflateEnd returns Z_OK if success, Z_STREAM_ERROR if the + stream state was inconsistent, Z_DATA_ERROR if the stream was freed + prematurely (some input or output was discarded). In the error case, msg + may be set but then points to a static string (which must not be + deallocated). +*/ + + +/* +ZEXTERN int ZEXPORT inflateInit(z_streamp strm); + + Initializes the internal stream state for decompression. The fields + next_in, avail_in, zalloc, zfree and opaque must be initialized before by + the caller. In the current version of inflate, the provided input is not + read or consumed. The allocation of a sliding window will be deferred to + the first call of inflate (if the decompression does not complete on the + first call). If zalloc and zfree are set to Z_NULL, inflateInit updates + them to use default allocation functions. total_in, total_out, adler, and + msg are initialized. + + inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit does not perform any decompression. + Actual decompression will be done by inflate(). So next_in, and avail_in, + next_out, and avail_out are unused and unchanged. The current + implementation of inflateInit() does not process any header information -- + that is deferred until inflate() is called. +*/ + + +ZEXTERN int ZEXPORT inflate(z_streamp strm, int flush); +/* + inflate decompresses as much data as possible, and stops when the input + buffer becomes empty or the output buffer becomes full. It may introduce + some output latency (reading input without producing any output) except when + forced to flush. + + The detailed semantics are as follows. inflate performs one or both of the + following actions: + + - Decompress more input starting at next_in and update next_in and avail_in + accordingly. If not all input can be processed (because there is not + enough room in the output buffer), then next_in and avail_in are updated + accordingly, and processing will resume at this point for the next call of + inflate(). + + - Generate more output starting at next_out and update next_out and avail_out + accordingly. inflate() provides as much output as possible, until there is + no more input data or no more space in the output buffer (see below about + the flush parameter). + + Before the call of inflate(), the application should ensure that at least + one of the actions is possible, by providing more input and/or consuming more + output, and updating the next_* and avail_* values accordingly. If the + caller of inflate() does not provide both available input and available + output space, it is possible that there will be no progress made. The + application can consume the uncompressed output when it wants, for example + when the output buffer is full (avail_out == 0), or after each call of + inflate(). If inflate returns Z_OK and with zero avail_out, it must be + called again after making room in the output buffer because there might be + more output pending. + + The flush parameter of inflate() can be Z_NO_FLUSH, Z_SYNC_FLUSH, Z_FINISH, + Z_BLOCK, or Z_TREES. Z_SYNC_FLUSH requests that inflate() flush as much + output as possible to the output buffer. Z_BLOCK requests that inflate() + stop if and when it gets to the next deflate block boundary. When decoding + the zlib or gzip format, this will cause inflate() to return immediately + after the header and before the first block. When doing a raw inflate, + inflate() will go ahead and process the first block, and will return when it + gets to the end of that block, or when it runs out of data. + + The Z_BLOCK option assists in appending to or combining deflate streams. + To assist in this, on return inflate() always sets strm->data_type to the + number of unused bits in the last byte taken from strm->next_in, plus 64 if + inflate() is currently decoding the last block in the deflate stream, plus + 128 if inflate() returned immediately after decoding an end-of-block code or + decoding the complete header up to just before the first byte of the deflate + stream. The end-of-block will not be indicated until all of the uncompressed + data from that block has been written to strm->next_out. The number of + unused bits may in general be greater than seven, except when bit 7 of + data_type is set, in which case the number of unused bits will be less than + eight. data_type is set as noted here every time inflate() returns for all + flush options, and so can be used to determine the amount of currently + consumed input in bits. + + The Z_TREES option behaves as Z_BLOCK does, but it also returns when the + end of each deflate block header is reached, before any actual data in that + block is decoded. This allows the caller to determine the length of the + deflate block header for later use in random access within a deflate block. + 256 is added to the value of strm->data_type when inflate() returns + immediately after reaching the end of the deflate block header. + + inflate() should normally be called until it returns Z_STREAM_END or an + error. However if all decompression is to be performed in a single step (a + single call of inflate), the parameter flush should be set to Z_FINISH. In + this case all pending input is processed and all pending output is flushed; + avail_out must be large enough to hold all of the uncompressed data for the + operation to complete. (The size of the uncompressed data may have been + saved by the compressor for this purpose.) The use of Z_FINISH is not + required to perform an inflation in one step. However it may be used to + inform inflate that a faster approach can be used for the single inflate() + call. Z_FINISH also informs inflate to not maintain a sliding window if the + stream completes, which reduces inflate's memory footprint. If the stream + does not complete, either because not all of the stream is provided or not + enough output space is provided, then a sliding window will be allocated and + inflate() can be called again to continue the operation as if Z_NO_FLUSH had + been used. + + In this implementation, inflate() always flushes as much output as + possible to the output buffer, and always uses the faster approach on the + first call. So the effects of the flush parameter in this implementation are + on the return value of inflate() as noted below, when inflate() returns early + when Z_BLOCK or Z_TREES is used, and when inflate() avoids the allocation of + memory for a sliding window when Z_FINISH is used. + + If a preset dictionary is needed after this call (see inflateSetDictionary + below), inflate sets strm->adler to the Adler-32 checksum of the dictionary + chosen by the compressor and returns Z_NEED_DICT; otherwise it sets + strm->adler to the Adler-32 checksum of all output produced so far (that is, + total_out bytes) and returns Z_OK, Z_STREAM_END or an error code as described + below. At the end of the stream, inflate() checks that its computed Adler-32 + checksum is equal to that saved by the compressor and returns Z_STREAM_END + only if the checksum is correct. + + inflate() can decompress and check either zlib-wrapped or gzip-wrapped + deflate data. The header type is detected automatically, if requested when + initializing with inflateInit2(). Any information contained in the gzip + header is not retained unless inflateGetHeader() is used. When processing + gzip-wrapped deflate data, strm->adler32 is set to the CRC-32 of the output + produced so far. The CRC-32 is checked against the gzip trailer, as is the + uncompressed length, modulo 2^32. + + inflate() returns Z_OK if some progress has been made (more input processed + or more output produced), Z_STREAM_END if the end of the compressed data has + been reached and all uncompressed output has been produced, Z_NEED_DICT if a + preset dictionary is needed at this point, Z_DATA_ERROR if the input data was + corrupted (input stream not conforming to the zlib format or incorrect check + value, in which case strm->msg points to a string with a more specific + error), Z_STREAM_ERROR if the stream structure was inconsistent (for example + next_in or next_out was Z_NULL, or the state was inadvertently written over + by the application), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR + if no progress was possible or if there was not enough room in the output + buffer when Z_FINISH is used. Note that Z_BUF_ERROR is not fatal, and + inflate() can be called again with more input and more output space to + continue decompressing. If Z_DATA_ERROR is returned, the application may + then call inflateSync() to look for a good compression block if a partial + recovery of the data is to be attempted. +*/ + + +ZEXTERN int ZEXPORT inflateEnd(z_streamp strm); +/* + All dynamically allocated data structures for this stream are freed. + This function discards any unprocessed input and does not flush any pending + output. + + inflateEnd returns Z_OK if success, or Z_STREAM_ERROR if the stream state + was inconsistent. +*/ + + + /* Advanced functions */ + +/* + The following functions are needed only in some special applications. +*/ + +/* +ZEXTERN int ZEXPORT deflateInit2(z_streamp strm, + int level, + int method, + int windowBits, + int memLevel, + int strategy); + + This is another version of deflateInit with more compression options. The + fields zalloc, zfree and opaque must be initialized before by the caller. + + The method parameter is the compression method. It must be Z_DEFLATED in + this version of the library. + + The windowBits parameter is the base two logarithm of the window size + (the size of the history buffer). It should be in the range 8..15 for this + version of the library. Larger values of this parameter result in better + compression at the expense of memory usage. The default value is 15 if + deflateInit is used instead. + + For the current implementation of deflate(), a windowBits value of 8 (a + window size of 256 bytes) is not supported. As a result, a request for 8 + will result in 9 (a 512-byte window). In that case, providing 8 to + inflateInit2() will result in an error when the zlib header with 9 is + checked against the initialization of inflate(). The remedy is to not use 8 + with deflateInit2() with this initialization, or at least in that case use 9 + with inflateInit2(). + + windowBits can also be -8..-15 for raw deflate. In this case, -windowBits + determines the window size. deflate() will then generate raw deflate data + with no zlib header or trailer, and will not compute a check value. + + windowBits can also be greater than 15 for optional gzip encoding. Add + 16 to windowBits to write a simple gzip header and trailer around the + compressed data instead of a zlib wrapper. The gzip header will have no + file name, no extra data, no comment, no modification time (set to zero), no + header crc, and the operating system will be set to the appropriate value, + if the operating system was determined at compile time. If a gzip stream is + being written, strm->adler is a CRC-32 instead of an Adler-32. + + For raw deflate or gzip encoding, a request for a 256-byte window is + rejected as invalid, since only the zlib header provides a means of + transmitting the window size to the decompressor. + + The memLevel parameter specifies how much memory should be allocated + for the internal compression state. memLevel=1 uses minimum memory but is + slow and reduces compression ratio; memLevel=9 uses maximum memory for + optimal speed. The default value is 8. See zconf.h for total memory usage + as a function of windowBits and memLevel. + + The strategy parameter is used to tune the compression algorithm. Use the + value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a + filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no + string match), or Z_RLE to limit match distances to one (run-length + encoding). Filtered data consists mostly of small values with a somewhat + random distribution. In this case, the compression algorithm is tuned to + compress them better. The effect of Z_FILTERED is to force more Huffman + coding and less string matching; it is somewhat intermediate between + Z_DEFAULT_STRATEGY and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as + fast as Z_HUFFMAN_ONLY, but give better compression for PNG image data. The + strategy parameter only affects the compression ratio but not the + correctness of the compressed output even if it is not set appropriately. + Z_FIXED prevents the use of dynamic Huffman codes, allowing for a simpler + decoder for special applications. + + deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_STREAM_ERROR if any parameter is invalid (such as an invalid + method), or Z_VERSION_ERROR if the zlib library version (zlib_version) is + incompatible with the version assumed by the caller (ZLIB_VERSION). msg is + set to null if there is no error message. deflateInit2 does not perform any + compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the compression dictionary from the given byte sequence + without producing any compressed output. When using the zlib format, this + function must be called immediately after deflateInit, deflateInit2 or + deflateReset, and before any call of deflate. When doing raw deflate, this + function must be called either before any call of deflate, or immediately + after the completion of a deflate block, i.e. after all input has been + consumed and all output has been delivered when using any of the flush + options Z_BLOCK, Z_PARTIAL_FLUSH, Z_SYNC_FLUSH, or Z_FULL_FLUSH. The + compressor and decompressor must use exactly the same dictionary (see + inflateSetDictionary). + + The dictionary should consist of strings (byte sequences) that are likely + to be encountered later in the data to be compressed, with the most commonly + used strings preferably put towards the end of the dictionary. Using a + dictionary is most useful when the data to be compressed is short and can be + predicted with good accuracy; the data can then be compressed better than + with the default empty dictionary. + + Depending on the size of the compression data structures selected by + deflateInit or deflateInit2, a part of the dictionary may in effect be + discarded, for example if the dictionary is larger than the window size + provided in deflateInit or deflateInit2. Thus the strings most likely to be + useful should be put at the end of the dictionary, not at the front. In + addition, the current implementation of deflate will use at most the window + size minus 262 bytes of the provided dictionary. + + Upon return of this function, strm->adler is set to the Adler-32 value + of the dictionary; the decompressor may later use this value to determine + which dictionary has been used by the compressor. (The Adler-32 value + applies to the whole dictionary even if only a subset of the dictionary is + actually used by the compressor.) If a raw deflate was requested, then the + Adler-32 value is not computed and strm->adler is not set. + + deflateSetDictionary returns Z_OK if success, or Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent (for example if deflate has already been called for this stream + or if not at a block boundary for raw deflate). deflateSetDictionary does + not perform any compression: this will be done by deflate(). +*/ + +ZEXTERN int ZEXPORT deflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by deflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If deflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + deflateGetDictionary() may return a length less than the window size, even + when more than the window size in input has been provided. It may return up + to 258 bytes less in that case, due to how zlib's implementation of deflate + manages the sliding window and lookahead for matches, where matches can be + up to 258 bytes long. If the application needs the last window-size bytes of + input, then that would need to be saved by the application outside of zlib. + + deflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when several compression strategies will be + tried, for example when there are several ways of pre-processing the input + data with a filter. The streams that will be discarded should then be freed + by calling deflateEnd. Note that deflateCopy duplicates the internal + compression state which can be quite large, so this strategy is slow and can + consume lots of memory. + + deflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT deflateReset(z_streamp strm); +/* + This function is equivalent to deflateEnd followed by deflateInit, but + does not free and reallocate the internal compression state. The stream + will leave the compression level and any other attributes that may have been + set unchanged. total_in, total_out, adler, and msg are initialized. + + deflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT deflateParams(z_streamp strm, + int level, + int strategy); +/* + Dynamically update the compression level and compression strategy. The + interpretation of level and strategy is as in deflateInit2(). This can be + used to switch between compression and straight copy of the input data, or + to switch to a different kind of input data requiring a different strategy. + If the compression approach (which is a function of the level) or the + strategy is changed, and if there have been any deflate() calls since the + state was initialized or reset, then the input available so far is + compressed with the old level and strategy using deflate(strm, Z_BLOCK). + There are three approaches for the compression levels 0, 1..3, and 4..9 + respectively. The new level and strategy will take effect at the next call + of deflate(). + + If a deflate(strm, Z_BLOCK) is performed by deflateParams(), and it does + not have enough output space to complete, then the parameter change will not + take effect. In this case, deflateParams() can be called again with the + same parameters and more output space to try again. + + In order to assure a change in the parameters on the first try, the + deflate stream should be flushed using deflate() with Z_BLOCK or other flush + request until strm.avail_out is not zero, before calling deflateParams(). + Then no more input data should be provided before the deflateParams() call. + If this is done, the old level and strategy will be applied to the data + compressed before deflateParams(), and the new level and strategy will be + applied to the data compressed after deflateParams(). + + deflateParams returns Z_OK on success, Z_STREAM_ERROR if the source stream + state was inconsistent or if a parameter was invalid, or Z_BUF_ERROR if + there was not enough output space to complete the compression of the + available input data before a change in the strategy or approach. Note that + in the case of a Z_BUF_ERROR, the parameters are not changed. A return + value of Z_BUF_ERROR is not fatal, in which case deflateParams() can be + retried with more output space. +*/ + +ZEXTERN int ZEXPORT deflateTune(z_streamp strm, + int good_length, + int max_lazy, + int nice_length, + int max_chain); +/* + Fine tune deflate's internal compression parameters. This should only be + used by someone who understands the algorithm used by zlib's deflate for + searching for the best matching string, and even then only by the most + fanatic optimizer trying to squeeze out the last compressed bit for their + specific input data. Read the deflate.c source code for the meaning of the + max_lazy, good_length, nice_length, and max_chain parameters. + + deflateTune() can be called after deflateInit() or deflateInit2(), and + returns Z_OK on success, or Z_STREAM_ERROR for an invalid deflate stream. + */ + +ZEXTERN uLong ZEXPORT deflateBound(z_streamp strm, + uLong sourceLen); +/* + deflateBound() returns an upper bound on the compressed size after + deflation of sourceLen bytes. It must be called after deflateInit() or + deflateInit2(), and after deflateSetHeader(), if used. This would be used + to allocate an output buffer for deflation in a single pass, and so would be + called before deflate(). If that first deflate() call is provided the + sourceLen input bytes, an output buffer allocated to the size returned by + deflateBound(), and the flush value Z_FINISH, then deflate() is guaranteed + to return Z_STREAM_END. Note that it is possible for the compressed size to + be larger than the value returned by deflateBound() if flush options other + than Z_FINISH or Z_NO_FLUSH are used. +*/ + +ZEXTERN int ZEXPORT deflatePending(z_streamp strm, + unsigned *pending, + int *bits); +/* + deflatePending() returns the number of bytes and bits of output that have + been generated, but not yet provided in the available output. The bytes not + provided would be due to the available output space having being consumed. + The number of bits of output not provided are between 0 and 7, where they + await more bits to join them in order to fill out a full byte. If pending + or bits are Z_NULL, then those values are not set. + + deflatePending returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. + */ + +ZEXTERN int ZEXPORT deflatePrime(z_streamp strm, + int bits, + int value); +/* + deflatePrime() inserts bits in the deflate output stream. The intent + is that this function is used to start off the deflate output with the bits + leftover from a previous deflate stream when appending to it. As such, this + function can only be used for raw deflate, and must be used before the first + deflate() call after a deflateInit2() or deflateReset(). bits must be less + than or equal to 16, and that many of the least significant bits of value + will be inserted in the output. + + deflatePrime returns Z_OK if success, Z_BUF_ERROR if there was not enough + room in the internal buffer to insert the bits, or Z_STREAM_ERROR if the + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT deflateSetHeader(z_streamp strm, + gz_headerp head); +/* + deflateSetHeader() provides gzip header information for when a gzip + stream is requested by deflateInit2(). deflateSetHeader() may be called + after deflateInit2() or deflateReset() and before the first call of + deflate(). The text, time, os, extra field, name, and comment information + in the provided gz_header structure are written to the gzip header (xflag is + ignored -- the extra flags are set according to the compression level). The + caller must assure that, if not Z_NULL, name and comment are terminated with + a zero byte, and that if extra is not Z_NULL, that extra_len bytes are + available there. If hcrc is true, a gzip header crc is included. Note that + the current versions of the command-line version of gzip (up through version + 1.3.x) do not support header crc's, and will report that it is a "multi-part + gzip file" and give up. + + If deflateSetHeader is not used, the default gzip header has text false, + the time set to zero, and os set to the current operating system, with no + extra, name, or comment fields. The gzip header is returned to the default + state by deflateReset(). + + deflateSetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateInit2(z_streamp strm, + int windowBits); + + This is another version of inflateInit with an extra parameter. The + fields next_in, avail_in, zalloc, zfree and opaque must be initialized + before by the caller. + + The windowBits parameter is the base two logarithm of the maximum window + size (the size of the history buffer). It should be in the range 8..15 for + this version of the library. The default value is 15 if inflateInit is used + instead. windowBits must be greater than or equal to the windowBits value + provided to deflateInit2() while compressing, or it must be equal to 15 if + deflateInit2() was not used. If a compressed stream with a larger window + size is given as input, inflate() will return with the error code + Z_DATA_ERROR instead of trying to allocate a larger window. + + windowBits can also be zero to request that inflate use the window size in + the zlib header of the compressed stream. + + windowBits can also be -8..-15 for raw inflate. In this case, -windowBits + determines the window size. inflate() will then process raw deflate data, + not looking for a zlib or gzip header, not generating a check value, and not + looking for any check values for comparison at the end of the stream. This + is for use with other formats that use the deflate compressed data format + such as zip. Those formats provide their own check values. If a custom + format is developed using the raw deflate format for compressed data, it is + recommended that a check value such as an Adler-32 or a CRC-32 be applied to + the uncompressed data as is done in the zlib, gzip, and zip formats. For + most applications, the zlib format should be used as is. Note that comments + above on the use in deflateInit2() applies to the magnitude of windowBits. + + windowBits can also be greater than 15 for optional gzip decoding. Add + 32 to windowBits to enable zlib and gzip decoding with automatic header + detection, or add 16 to decode only the gzip format (the zlib format will + return a Z_DATA_ERROR). If a gzip stream is being decoded, strm->adler is a + CRC-32 instead of an Adler-32. Unlike the gunzip utility and gzread() (see + below), inflate() will *not* automatically decode concatenated gzip members. + inflate() will return Z_STREAM_END at the end of the gzip member. The state + would need to be reset to continue decoding a subsequent gzip member. This + *must* be done if there is more data after a gzip member, in order for the + decompression to be compliant with the gzip standard (RFC 1952). + + inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_VERSION_ERROR if the zlib library version is incompatible with the + version assumed by the caller, or Z_STREAM_ERROR if the parameters are + invalid, such as a null pointer to the structure. msg is set to null if + there is no error message. inflateInit2 does not perform any decompression + apart from possibly reading the zlib header if present: actual decompression + will be done by inflate(). (So next_in and avail_in may be modified, but + next_out and avail_out are unused and unchanged.) The current implementation + of inflateInit2() does not process any header information -- that is + deferred until inflate() is called. +*/ + +ZEXTERN int ZEXPORT inflateSetDictionary(z_streamp strm, + const Bytef *dictionary, + uInt dictLength); +/* + Initializes the decompression dictionary from the given uncompressed byte + sequence. This function must be called immediately after a call of inflate, + if that call returned Z_NEED_DICT. The dictionary chosen by the compressor + can be determined from the Adler-32 value returned by that call of inflate. + The compressor and decompressor must use exactly the same dictionary (see + deflateSetDictionary). For raw inflate, this function can be called at any + time to set the dictionary. If the provided dictionary is smaller than the + window and there is already data in the window, then the provided dictionary + will amend what's there. The application must insure that the dictionary + that was used for compression is provided. + + inflateSetDictionary returns Z_OK if success, Z_STREAM_ERROR if a + parameter is invalid (e.g. dictionary being Z_NULL) or the stream state is + inconsistent, Z_DATA_ERROR if the given dictionary doesn't match the + expected one (incorrect Adler-32 value). inflateSetDictionary does not + perform any decompression: this will be done by subsequent calls of + inflate(). +*/ + +ZEXTERN int ZEXPORT inflateGetDictionary(z_streamp strm, + Bytef *dictionary, + uInt *dictLength); +/* + Returns the sliding dictionary being maintained by inflate. dictLength is + set to the number of bytes in the dictionary, and that many bytes are copied + to dictionary. dictionary must have enough space, where 32768 bytes is + always enough. If inflateGetDictionary() is called with dictionary equal to + Z_NULL, then only the dictionary length is returned, and nothing is copied. + Similarly, if dictLength is Z_NULL, then it is not set. + + inflateGetDictionary returns Z_OK on success, or Z_STREAM_ERROR if the + stream state is inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateSync(z_streamp strm); +/* + Skips invalid compressed data until a possible full flush point (see above + for the description of deflate with Z_FULL_FLUSH) can be found, or until all + available input is skipped. No output is provided. + + inflateSync searches for a 00 00 FF FF pattern in the compressed data. + All full flush points have this pattern, but not all occurrences of this + pattern are full flush points. + + inflateSync returns Z_OK if a possible full flush point has been found, + Z_BUF_ERROR if no more input was provided, Z_DATA_ERROR if no flush point + has been found, or Z_STREAM_ERROR if the stream structure was inconsistent. + In the success case, the application may save the current value of total_in + which indicates where valid compressed data was found. In the error case, + the application may repeatedly call inflateSync, providing more input each + time, until success or end of the input data. +*/ + +ZEXTERN int ZEXPORT inflateCopy(z_streamp dest, + z_streamp source); +/* + Sets the destination stream as a complete copy of the source stream. + + This function can be useful when randomly accessing a large stream. The + first pass through the stream can periodically record the inflate state, + allowing restarting inflate at those points when randomly accessing the + stream. + + inflateCopy returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_STREAM_ERROR if the source stream state was inconsistent + (such as zalloc being Z_NULL). msg is left unchanged in both source and + destination. +*/ + +ZEXTERN int ZEXPORT inflateReset(z_streamp strm); +/* + This function is equivalent to inflateEnd followed by inflateInit, + but does not free and reallocate the internal decompression state. The + stream will keep attributes that may have been set by inflateInit2. + total_in, total_out, adler, and msg are initialized. + + inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL). +*/ + +ZEXTERN int ZEXPORT inflateReset2(z_streamp strm, + int windowBits); +/* + This function is the same as inflateReset, but it also permits changing + the wrap and window size requests. The windowBits parameter is interpreted + the same as it is for inflateInit2. If the window size is changed, then the + memory allocated for the window is freed, and the window will be reallocated + by inflate() if needed. + + inflateReset2 returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent (such as zalloc or state being Z_NULL), or if + the windowBits parameter is invalid. +*/ + +ZEXTERN int ZEXPORT inflatePrime(z_streamp strm, + int bits, + int value); +/* + This function inserts bits in the inflate input stream. The intent is + that this function is used to start inflating at a bit position in the + middle of a byte. The provided bits will be used before any bytes are used + from next_in. This function should only be used with raw inflate, and + should be used before the first inflate() call after inflateInit2() or + inflateReset(). bits must be less than or equal to 16, and that many of the + least significant bits of value will be inserted in the input. + + If bits is negative, then the input stream bit buffer is emptied. Then + inflatePrime() can be called again to put bits in the buffer. This is used + to clear out bits leftover after feeding inflate a block description prior + to feeding inflate codes. + + inflatePrime returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +ZEXTERN long ZEXPORT inflateMark(z_streamp strm); +/* + This function returns two values, one in the lower 16 bits of the return + value, and the other in the remaining upper bits, obtained by shifting the + return value down 16 bits. If the upper value is -1 and the lower value is + zero, then inflate() is currently decoding information outside of a block. + If the upper value is -1 and the lower value is non-zero, then inflate is in + the middle of a stored block, with the lower value equaling the number of + bytes from the input remaining to copy. If the upper value is not -1, then + it is the number of bits back from the current bit position in the input of + the code (literal or length/distance pair) currently being processed. In + that case the lower value is the number of bytes already emitted for that + code. + + A code is being processed if inflate is waiting for more input to complete + decoding of the code, or if it has completed decoding but is waiting for + more output space to write the literal or match data. + + inflateMark() is used to mark locations in the input data for random + access, which may be at bit positions, and to note those cases where the + output of a code may span boundaries of random access blocks. The current + location in the input stream can be determined from avail_in and data_type + as noted in the description for the Z_BLOCK flush parameter for inflate. + + inflateMark returns the value noted above, or -65536 if the provided + source stream state was inconsistent. +*/ + +ZEXTERN int ZEXPORT inflateGetHeader(z_streamp strm, + gz_headerp head); +/* + inflateGetHeader() requests that gzip header information be stored in the + provided gz_header structure. inflateGetHeader() may be called after + inflateInit2() or inflateReset(), and before the first call of inflate(). + As inflate() processes the gzip stream, head->done is zero until the header + is completed, at which time head->done is set to one. If a zlib stream is + being decoded, then head->done is set to -1 to indicate that there will be + no gzip header information forthcoming. Note that Z_BLOCK or Z_TREES can be + used to force inflate() to return immediately after header processing is + complete and before any actual data is decompressed. + + The text, time, xflags, and os fields are filled in with the gzip header + contents. hcrc is set to true if there is a header CRC. (The header CRC + was valid if done is set to one.) If extra is not Z_NULL, then extra_max + contains the maximum number of bytes to write to extra. Once done is true, + extra_len contains the actual extra field length, and extra contains the + extra field, or that field truncated if extra_max is less than extra_len. + If name is not Z_NULL, then up to name_max characters are written there, + terminated with a zero unless the length is greater than name_max. If + comment is not Z_NULL, then up to comm_max characters are written there, + terminated with a zero unless the length is greater than comm_max. When any + of extra, name, or comment are not Z_NULL and the respective field is not + present in the header, then that field is set to Z_NULL to signal its + absence. This allows the use of deflateSetHeader() with the returned + structure to duplicate the header. However if those fields are set to + allocated memory, then the application will need to save those pointers + elsewhere so that they can be eventually freed. + + If inflateGetHeader is not used, then the header information is simply + discarded. The header is always checked for validity, including the header + CRC if present. inflateReset() will reset the process to discard the header + information. The application would need to call inflateGetHeader() again to + retrieve the header from the next gzip stream. + + inflateGetHeader returns Z_OK if success, or Z_STREAM_ERROR if the source + stream state was inconsistent. +*/ + +/* +ZEXTERN int ZEXPORT inflateBackInit(z_streamp strm, int windowBits, + unsigned char FAR *window); + + Initialize the internal stream state for decompression using inflateBack() + calls. The fields zalloc, zfree and opaque in strm must be initialized + before the call. If zalloc and zfree are Z_NULL, then the default library- + derived memory allocation routines are used. windowBits is the base two + logarithm of the window size, in the range 8..15. window is a caller + supplied buffer of that size. Except for special applications where it is + assured that deflate was used with small window sizes, windowBits must be 15 + and a 32K byte window must be supplied to be able to decompress general + deflate streams. + + See inflateBack() for the usage of these routines. + + inflateBackInit will return Z_OK on success, Z_STREAM_ERROR if any of + the parameters are invalid, Z_MEM_ERROR if the internal state could not be + allocated, or Z_VERSION_ERROR if the version of the library does not match + the version of the header file. +*/ + +typedef unsigned (*in_func)(void FAR *, + z_const unsigned char FAR * FAR *); +typedef int (*out_func)(void FAR *, unsigned char FAR *, unsigned); + +ZEXTERN int ZEXPORT inflateBack(z_streamp strm, + in_func in, void FAR *in_desc, + out_func out, void FAR *out_desc); +/* + inflateBack() does a raw inflate with a single call using a call-back + interface for input and output. This is potentially more efficient than + inflate() for file i/o applications, in that it avoids copying between the + output and the sliding window by simply making the window itself the output + buffer. inflate() can be faster on modern CPUs when used with large + buffers. inflateBack() trusts the application to not change the output + buffer passed by the output function, at least until inflateBack() returns. + + inflateBackInit() must be called first to allocate the internal state + and to initialize the state with the user-provided window buffer. + inflateBack() may then be used multiple times to inflate a complete, raw + deflate stream with each call. inflateBackEnd() is then called to free the + allocated state. + + A raw deflate stream is one with no zlib or gzip header or trailer. + This routine would normally be used in a utility that reads zip or gzip + files and writes out uncompressed files. The utility would decode the + header and process the trailer on its own, hence this routine expects only + the raw deflate stream to decompress. This is different from the default + behavior of inflate(), which expects a zlib header and trailer around the + deflate stream. + + inflateBack() uses two subroutines supplied by the caller that are then + called by inflateBack() for input and output. inflateBack() calls those + routines until it reads a complete deflate stream and writes out all of the + uncompressed data, or until it encounters an error. The function's + parameters and return types are defined above in the in_func and out_func + typedefs. inflateBack() will call in(in_desc, &buf) which should return the + number of bytes of provided input, and a pointer to that input in buf. If + there is no input available, in() must return zero -- buf is ignored in that + case -- and inflateBack() will return a buffer error. inflateBack() will + call out(out_desc, buf, len) to write the uncompressed data buf[0..len-1]. + out() should return zero on success, or non-zero on failure. If out() + returns non-zero, inflateBack() will return with an error. Neither in() nor + out() are permitted to change the contents of the window provided to + inflateBackInit(), which is also the buffer that out() uses to write from. + The length written by out() will be at most the window size. Any non-zero + amount of input may be provided by in(). + + For convenience, inflateBack() can be provided input on the first call by + setting strm->next_in and strm->avail_in. If that input is exhausted, then + in() will be called. Therefore strm->next_in must be initialized before + calling inflateBack(). If strm->next_in is Z_NULL, then in() will be called + immediately for input. If strm->next_in is not Z_NULL, then strm->avail_in + must also be initialized, and then if strm->avail_in is not zero, input will + initially be taken from strm->next_in[0 .. strm->avail_in - 1]. + + The in_desc and out_desc parameters of inflateBack() is passed as the + first parameter of in() and out() respectively when they are called. These + descriptors can be optionally used to pass any information that the caller- + supplied in() and out() functions need to do their job. + + On return, inflateBack() will set strm->next_in and strm->avail_in to + pass back any unused input that was provided by the last in() call. The + return values of inflateBack() can be Z_STREAM_END on success, Z_BUF_ERROR + if in() or out() returned an error, Z_DATA_ERROR if there was a format error + in the deflate stream (in which case strm->msg is set to indicate the nature + of the error), or Z_STREAM_ERROR if the stream was not properly initialized. + In the case of Z_BUF_ERROR, an input or output error can be distinguished + using strm->next_in which will be Z_NULL only if in() returned an error. If + strm->next_in is not Z_NULL, then the Z_BUF_ERROR was due to out() returning + non-zero. (in() will always be called before out(), so strm->next_in is + assured to be defined if out() returns non-zero.) Note that inflateBack() + cannot return Z_OK. +*/ + +ZEXTERN int ZEXPORT inflateBackEnd(z_streamp strm); +/* + All memory allocated by inflateBackInit() is freed. + + inflateBackEnd() returns Z_OK on success, or Z_STREAM_ERROR if the stream + state was inconsistent. +*/ + +ZEXTERN uLong ZEXPORT zlibCompileFlags(void); +/* Return flags indicating compile-time options. + + Type sizes, two bits each, 00 = 16 bits, 01 = 32, 10 = 64, 11 = other: + 1.0: size of uInt + 3.2: size of uLong + 5.4: size of voidpf (pointer) + 7.6: size of z_off_t + + Compiler, assembler, and debug options: + 8: ZLIB_DEBUG + 9: ASMV or ASMINF -- use ASM code + 10: ZLIB_WINAPI -- exported functions use the WINAPI calling convention + 11: 0 (reserved) + + One-time table building (smaller code, but not thread-safe if true): + 12: BUILDFIXED -- build static block decoding tables when needed + 13: DYNAMIC_CRC_TABLE -- build CRC calculation tables when needed + 14,15: 0 (reserved) + + Library content (indicates missing functionality): + 16: NO_GZCOMPRESS -- gz* functions cannot compress (to avoid linking + deflate code when not needed) + 17: NO_GZIP -- deflate can't write gzip streams, and inflate can't detect + and decode gzip streams (to avoid linking crc code) + 18-19: 0 (reserved) + + Operation variations (changes in library functionality): + 20: PKZIP_BUG_WORKAROUND -- slightly more permissive inflate + 21: FASTEST -- deflate algorithm with only one, lowest compression level + 22,23: 0 (reserved) + + The sprintf variant used by gzprintf (zero is best): + 24: 0 = vs*, 1 = s* -- 1 means limited to 20 arguments after the format + 25: 0 = *nprintf, 1 = *printf -- 1 means gzprintf() not secure! + 26: 0 = returns value, 1 = void -- 1 means inferred string length returned + + Remainder: + 27-31: 0 (reserved) + */ + +#ifndef Z_SOLO + + /* utility functions */ + +/* + The following utility functions are implemented on top of the basic + stream-oriented functions. To simplify the interface, some default options + are assumed (compression level and memory usage, standard memory allocation + functions). The source code of these utility functions can be modified if + you need special options. +*/ + +ZEXTERN int ZEXPORT compress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Compresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. compress() is equivalent to compress2() with a level + parameter of Z_DEFAULT_COMPRESSION. + + compress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer. +*/ + +ZEXTERN int ZEXPORT compress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen, + int level); +/* + Compresses the source buffer into the destination buffer. The level + parameter has the same meaning as in deflateInit. sourceLen is the byte + length of the source buffer. Upon entry, destLen is the total size of the + destination buffer, which must be at least the value returned by + compressBound(sourceLen). Upon exit, destLen is the actual size of the + compressed data. + + compress2 returns Z_OK if success, Z_MEM_ERROR if there was not enough + memory, Z_BUF_ERROR if there was not enough room in the output buffer, + Z_STREAM_ERROR if the level parameter is invalid. +*/ + +ZEXTERN uLong ZEXPORT compressBound(uLong sourceLen); +/* + compressBound() returns an upper bound on the compressed size after + compress() or compress2() on sourceLen bytes. It would be used before a + compress() or compress2() call to allocate the destination buffer. +*/ + +ZEXTERN int ZEXPORT uncompress(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong sourceLen); +/* + Decompresses the source buffer into the destination buffer. sourceLen is + the byte length of the source buffer. Upon entry, destLen is the total size + of the destination buffer, which must be large enough to hold the entire + uncompressed data. (The size of the uncompressed data must have been saved + previously by the compressor and transmitted to the decompressor by some + mechanism outside the scope of this compression library.) Upon exit, destLen + is the actual size of the uncompressed data. + + uncompress returns Z_OK if success, Z_MEM_ERROR if there was not + enough memory, Z_BUF_ERROR if there was not enough room in the output + buffer, or Z_DATA_ERROR if the input data was corrupted or incomplete. In + the case where there is not enough room, uncompress() will fill the output + buffer with the uncompressed data up to that point. +*/ + +ZEXTERN int ZEXPORT uncompress2(Bytef *dest, uLongf *destLen, + const Bytef *source, uLong *sourceLen); +/* + Same as uncompress, except that sourceLen is a pointer, where the + length of the source is *sourceLen. On return, *sourceLen is the number of + source bytes consumed. +*/ + + /* gzip file access functions */ + +/* + This library supports reading and writing files in gzip (.gz) format with + an interface similar to that of stdio, using the functions that start with + "gz". The gzip format is different from the zlib format. gzip is a gzip + wrapper, documented in RFC 1952, wrapped around a deflate stream. +*/ + +typedef struct gzFile_s *gzFile; /* semi-opaque gzip file descriptor */ + +/* +ZEXTERN gzFile ZEXPORT gzopen(const char *path, const char *mode); + + Open the gzip (.gz) file at path for reading and decompressing, or + compressing and writing. The mode parameter is as in fopen ("rb" or "wb") + but can also include a compression level ("wb9") or a strategy: 'f' for + filtered data as in "wb6f", 'h' for Huffman-only compression as in "wb1h", + 'R' for run-length encoding as in "wb1R", or 'F' for fixed code compression + as in "wb9F". (See the description of deflateInit2 for more information + about the strategy parameter.) 'T' will request transparent writing or + appending with no compression and not using the gzip format. + + "a" can be used instead of "w" to request that the gzip stream that will + be written be appended to the file. "+" will result in an error, since + reading and writing to the same gzip file is not supported. The addition of + "x" when writing will create the file exclusively, which fails if the file + already exists. On systems that support it, the addition of "e" when + reading or writing will set the flag to close the file on an execve() call. + + These functions, as well as gzip, will read and decode a sequence of gzip + streams in a file. The append function of gzopen() can be used to create + such a file. (Also see gzflush() for another way to do this.) When + appending, gzopen does not test whether the file begins with a gzip stream, + nor does it look for the end of the gzip streams to begin appending. gzopen + will simply append a gzip stream to the existing file. + + gzopen can be used to read a file which is not in gzip format; in this + case gzread will directly read from the file without decompression. When + reading, this will be detected automatically by looking for the magic two- + byte gzip header. + + gzopen returns NULL if the file could not be opened, if there was + insufficient memory to allocate the gzFile state, or if an invalid mode was + specified (an 'r', 'w', or 'a' was not provided, or '+' was provided). + errno can be checked to determine if the reason gzopen failed was that the + file could not be opened. +*/ + +ZEXTERN gzFile ZEXPORT gzdopen(int fd, const char *mode); +/* + Associate a gzFile with the file descriptor fd. File descriptors are + obtained from calls like open, dup, creat, pipe or fileno (if the file has + been previously opened with fopen). The mode parameter is as in gzopen. + + The next call of gzclose on the returned gzFile will also close the file + descriptor fd, just like fclose(fdopen(fd, mode)) closes the file descriptor + fd. If you want to keep fd open, use fd = dup(fd_keep); gz = gzdopen(fd, + mode);. The duplicated descriptor should be saved to avoid a leak, since + gzdopen does not close fd if it fails. If you are using fileno() to get the + file descriptor from a FILE *, then you will have to use dup() to avoid + double-close()ing the file descriptor. Both gzclose() and fclose() will + close the associated file descriptor, so they need to have different file + descriptors. + + gzdopen returns NULL if there was insufficient memory to allocate the + gzFile state, if an invalid mode was specified (an 'r', 'w', or 'a' was not + provided, or '+' was provided), or if fd is -1. The file descriptor is not + used until the next gz* read, write, seek, or close operation, so gzdopen + will not detect if fd is invalid (unless fd is -1). +*/ + +ZEXTERN int ZEXPORT gzbuffer(gzFile file, unsigned size); +/* + Set the internal buffer size used by this library's functions for file to + size. The default buffer size is 8192 bytes. This function must be called + after gzopen() or gzdopen(), and before any other calls that read or write + the file. The buffer memory allocation is always deferred to the first read + or write. Three times that size in buffer space is allocated. A larger + buffer size of, for example, 64K or 128K bytes will noticeably increase the + speed of decompression (reading). + + The new buffer size also affects the maximum length for gzprintf(). + + gzbuffer() returns 0 on success, or -1 on failure, such as being called + too late. +*/ + +ZEXTERN int ZEXPORT gzsetparams(gzFile file, int level, int strategy); +/* + Dynamically update the compression level and strategy for file. See the + description of deflateInit2 for the meaning of these parameters. Previously + provided data is flushed before applying the parameter changes. + + gzsetparams returns Z_OK if success, Z_STREAM_ERROR if the file was not + opened for writing, Z_ERRNO if there is an error writing the flushed data, + or Z_MEM_ERROR if there is a memory allocation error. +*/ + +ZEXTERN int ZEXPORT gzread(gzFile file, voidp buf, unsigned len); +/* + Read and decompress up to len uncompressed bytes from file into buf. If + the input file is not in gzip format, gzread copies the given number of + bytes into the buffer directly from the file. + + After reaching the end of a gzip stream in the input, gzread will continue + to read, looking for another gzip stream. Any number of gzip streams may be + concatenated in the input file, and will all be decompressed by gzread(). + If something other than a gzip stream is encountered after a gzip stream, + that remaining trailing garbage is ignored (and no error is returned). + + gzread can be used to read a gzip file that is being concurrently written. + Upon reaching the end of the input, gzread will return with the available + data. If the error code returned by gzerror is Z_OK or Z_BUF_ERROR, then + gzclearerr can be used to clear the end of file indicator in order to permit + gzread to be tried again. Z_OK indicates that a gzip stream was completed + on the last gzread. Z_BUF_ERROR indicates that the input file ended in the + middle of a gzip stream. Note that gzread does not return -1 in the event + of an incomplete gzip stream. This error is deferred until gzclose(), which + will return Z_BUF_ERROR if the last gzread ended in the middle of a gzip + stream. Alternatively, gzerror can be used before gzclose to detect this + case. + + gzread returns the number of uncompressed bytes actually read, less than + len for end of file, or -1 for error. If len is too large to fit in an int, + then nothing is read, -1 is returned, and the error state is set to + Z_STREAM_ERROR. +*/ + +ZEXTERN z_size_t ZEXPORT gzfread(voidp buf, z_size_t size, z_size_t nitems, + gzFile file); +/* + Read and decompress up to nitems items of size size from file into buf, + otherwise operating as gzread() does. This duplicates the interface of + stdio's fread(), with size_t request and return types. If the library + defines size_t, then z_size_t is identical to size_t. If not, then z_size_t + is an unsigned integer type that can contain a pointer. + + gzfread() returns the number of full items read of size size, or zero if + the end of the file was reached and a full item could not be read, or if + there was an error. gzerror() must be consulted if zero is returned in + order to determine if there was an error. If the multiplication of size and + nitems overflows, i.e. the product does not fit in a z_size_t, then nothing + is read, zero is returned, and the error state is set to Z_STREAM_ERROR. + + In the event that the end of file is reached and only a partial item is + available at the end, i.e. the remaining uncompressed data length is not a + multiple of size, then the final partial item is nevertheless read into buf + and the end-of-file flag is set. The length of the partial item read is not + provided, but could be inferred from the result of gztell(). This behavior + is the same as the behavior of fread() implementations in common libraries, + but it prevents the direct use of gzfread() to read a concurrently written + file, resetting and retrying on end-of-file, when size is not 1. +*/ + +ZEXTERN int ZEXPORT gzwrite(gzFile file, voidpc buf, unsigned len); +/* + Compress and write the len uncompressed bytes at buf to file. gzwrite + returns the number of uncompressed bytes written or 0 in case of error. +*/ + +ZEXTERN z_size_t ZEXPORT gzfwrite(voidpc buf, z_size_t size, + z_size_t nitems, gzFile file); +/* + Compress and write nitems items of size size from buf to file, duplicating + the interface of stdio's fwrite(), with size_t request and return types. If + the library defines size_t, then z_size_t is identical to size_t. If not, + then z_size_t is an unsigned integer type that can contain a pointer. + + gzfwrite() returns the number of full items written of size size, or zero + if there was an error. If the multiplication of size and nitems overflows, + i.e. the product does not fit in a z_size_t, then nothing is written, zero + is returned, and the error state is set to Z_STREAM_ERROR. +*/ + +ZEXTERN int ZEXPORTVA gzprintf(gzFile file, const char *format, ...); +/* + Convert, format, compress, and write the arguments (...) to file under + control of the string format, as in fprintf. gzprintf returns the number of + uncompressed bytes actually written, or a negative zlib error code in case + of error. The number of uncompressed bytes written is limited to 8191, or + one less than the buffer size given to gzbuffer(). The caller should assure + that this limit is not exceeded. If it is exceeded, then gzprintf() will + return an error (0) with nothing written. In this case, there may also be a + buffer overflow with unpredictable consequences, which is possible only if + zlib was compiled with the insecure functions sprintf() or vsprintf(), + because the secure snprintf() or vsnprintf() functions were not available. + This can be determined using zlibCompileFlags(). +*/ + +ZEXTERN int ZEXPORT gzputs(gzFile file, const char *s); +/* + Compress and write the given null-terminated string s to file, excluding + the terminating null character. + + gzputs returns the number of characters written, or -1 in case of error. +*/ + +ZEXTERN char * ZEXPORT gzgets(gzFile file, char *buf, int len); +/* + Read and decompress bytes from file into buf, until len-1 characters are + read, or until a newline character is read and transferred to buf, or an + end-of-file condition is encountered. If any characters are read or if len + is one, the string is terminated with a null character. If no characters + are read due to an end-of-file or len is less than one, then the buffer is + left untouched. + + gzgets returns buf which is a null-terminated string, or it returns NULL + for end-of-file or in case of error. If there was an error, the contents at + buf are indeterminate. +*/ + +ZEXTERN int ZEXPORT gzputc(gzFile file, int c); +/* + Compress and write c, converted to an unsigned char, into file. gzputc + returns the value that was written, or -1 in case of error. +*/ + +ZEXTERN int ZEXPORT gzgetc(gzFile file); +/* + Read and decompress one byte from file. gzgetc returns this byte or -1 + in case of end of file or error. This is implemented as a macro for speed. + As such, it does not do all of the checking the other functions do. I.e. + it does not check to see if file is NULL, nor whether the structure file + points to has been clobbered or not. +*/ + +ZEXTERN int ZEXPORT gzungetc(int c, gzFile file); +/* + Push c back onto the stream for file to be read as the first character on + the next read. At least one character of push-back is always allowed. + gzungetc() returns the character pushed, or -1 on failure. gzungetc() will + fail if c is -1, and may fail if a character has been pushed but not read + yet. If gzungetc is used immediately after gzopen or gzdopen, at least the + output buffer size of pushed characters is allowed. (See gzbuffer above.) + The pushed character will be discarded if the stream is repositioned with + gzseek() or gzrewind(). +*/ + +ZEXTERN int ZEXPORT gzflush(gzFile file, int flush); +/* + Flush all pending output to file. The parameter flush is as in the + deflate() function. The return value is the zlib error number (see function + gzerror below). gzflush is only permitted when writing. + + If the flush parameter is Z_FINISH, the remaining data is written and the + gzip stream is completed in the output. If gzwrite() is called again, a new + gzip stream will be started in the output. gzread() is able to read such + concatenated gzip streams. + + gzflush should be called only when strictly necessary because it will + degrade compression if called too often. +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzseek(gzFile file, + z_off_t offset, int whence); + + Set the starting position to offset relative to whence for the next gzread + or gzwrite on file. The offset represents a number of bytes in the + uncompressed data stream. The whence parameter is defined as in lseek(2); + the value SEEK_END is not supported. + + If the file is opened for reading, this function is emulated but can be + extremely slow. If the file is opened for writing, only forward seeks are + supported; gzseek then compresses a sequence of zeroes up to the new + starting position. + + gzseek returns the resulting offset location as measured in bytes from + the beginning of the uncompressed stream, or -1 in case of error, in + particular if the file is opened for writing and the new starting position + would be before the current position. +*/ + +ZEXTERN int ZEXPORT gzrewind(gzFile file); +/* + Rewind file. This function is supported only for reading. + + gzrewind(file) is equivalent to (int)gzseek(file, 0L, SEEK_SET). +*/ + +/* +ZEXTERN z_off_t ZEXPORT gztell(gzFile file); + + Return the starting position for the next gzread or gzwrite on file. + This position represents a number of bytes in the uncompressed data stream, + and is zero when starting, even if appending or reading a gzip stream from + the middle of a file using gzdopen(). + + gztell(file) is equivalent to gzseek(file, 0L, SEEK_CUR) +*/ + +/* +ZEXTERN z_off_t ZEXPORT gzoffset(gzFile file); + + Return the current compressed (actual) read or write offset of file. This + offset includes the count of bytes that precede the gzip stream, for example + when appending or when using gzdopen() for reading. When reading, the + offset does not include as yet unused buffered input. This information can + be used for a progress indicator. On error, gzoffset() returns -1. +*/ + +ZEXTERN int ZEXPORT gzeof(gzFile file); +/* + Return true (1) if the end-of-file indicator for file has been set while + reading, false (0) otherwise. Note that the end-of-file indicator is set + only if the read tried to go past the end of the input, but came up short. + Therefore, just like feof(), gzeof() may return false even if there is no + more data to read, in the event that the last read request was for the exact + number of bytes remaining in the input file. This will happen if the input + file size is an exact multiple of the buffer size. + + If gzeof() returns true, then the read functions will return no more data, + unless the end-of-file indicator is reset by gzclearerr() and the input file + has grown since the previous end of file was detected. +*/ + +ZEXTERN int ZEXPORT gzdirect(gzFile file); +/* + Return true (1) if file is being copied directly while reading, or false + (0) if file is a gzip stream being decompressed. + + If the input file is empty, gzdirect() will return true, since the input + does not contain a gzip stream. + + If gzdirect() is used immediately after gzopen() or gzdopen() it will + cause buffers to be allocated to allow reading the file to determine if it + is a gzip file. Therefore if gzbuffer() is used, it should be called before + gzdirect(). + + When writing, gzdirect() returns true (1) if transparent writing was + requested ("wT" for the gzopen() mode), or false (0) otherwise. (Note: + gzdirect() is not needed when writing. Transparent writing must be + explicitly requested, so the application already knows the answer. When + linking statically, using gzdirect() will include all of the zlib code for + gzip file reading and decompression, which may not be desired.) +*/ + +ZEXTERN int ZEXPORT gzclose(gzFile file); +/* + Flush all pending output for file, if necessary, close file and + deallocate the (de)compression state. Note that once file is closed, you + cannot call gzerror with file, since its structures have been deallocated. + gzclose must not be called more than once on the same file, just as free + must not be called more than once on the same allocation. + + gzclose will return Z_STREAM_ERROR if file is not valid, Z_ERRNO on a + file operation error, Z_MEM_ERROR if out of memory, Z_BUF_ERROR if the + last read ended in the middle of a gzip stream, or Z_OK on success. +*/ + +ZEXTERN int ZEXPORT gzclose_r(gzFile file); +ZEXTERN int ZEXPORT gzclose_w(gzFile file); +/* + Same as gzclose(), but gzclose_r() is only for use when reading, and + gzclose_w() is only for use when writing or appending. The advantage to + using these instead of gzclose() is that they avoid linking in zlib + compression or decompression code that is not used when only reading or only + writing respectively. If gzclose() is used, then both compression and + decompression code will be included the application when linking to a static + zlib library. +*/ + +ZEXTERN const char * ZEXPORT gzerror(gzFile file, int *errnum); +/* + Return the error message for the last error which occurred on file. + errnum is set to zlib error number. If an error occurred in the file system + and not in the compression library, errnum is set to Z_ERRNO and the + application may consult errno to get the exact error code. + + The application must not modify the returned string. Future calls to + this function may invalidate the previously returned string. If file is + closed, then the string previously returned by gzerror will no longer be + available. + + gzerror() should be used to distinguish errors from end-of-file for those + functions above that do not distinguish those cases in their return values. +*/ + +ZEXTERN void ZEXPORT gzclearerr(gzFile file); +/* + Clear the error and end-of-file flags for file. This is analogous to the + clearerr() function in stdio. This is useful for continuing to read a gzip + file that is being written concurrently. +*/ + +#endif /* !Z_SOLO */ + + /* checksum functions */ + +/* + These functions are not related to compression but are exported + anyway because they might be useful in applications using the compression + library. +*/ + +ZEXTERN uLong ZEXPORT adler32(uLong adler, const Bytef *buf, uInt len); +/* + Update a running Adler-32 checksum with the bytes buf[0..len-1] and + return the updated checksum. An Adler-32 value is in the range of a 32-bit + unsigned integer. If buf is Z_NULL, this function returns the required + initial value for the checksum. + + An Adler-32 checksum is almost as reliable as a CRC-32 but can be computed + much faster. + + Usage example: + + uLong adler = adler32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + adler = adler32(adler, buffer, length); + } + if (adler != original_adler) error(); +*/ + +ZEXTERN uLong ZEXPORT adler32_z(uLong adler, const Bytef *buf, + z_size_t len); +/* + Same as adler32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT adler32_combine(uLong adler1, uLong adler2, + z_off_t len2); + + Combine two Adler-32 checksums into one. For two sequences of bytes, seq1 + and seq2 with lengths len1 and len2, Adler-32 checksums were calculated for + each, adler1 and adler2. adler32_combine() returns the Adler-32 checksum of + seq1 and seq2 concatenated, requiring only adler1, adler2, and len2. Note + that the z_off_t type (like off_t) is a signed integer. If len2 is + negative, the result has no meaning or utility. +*/ + +ZEXTERN uLong ZEXPORT crc32(uLong crc, const Bytef *buf, uInt len); +/* + Update a running CRC-32 with the bytes buf[0..len-1] and return the + updated CRC-32. A CRC-32 value is in the range of a 32-bit unsigned integer. + If buf is Z_NULL, this function returns the required initial value for the + crc. Pre- and post-conditioning (one's complement) is performed within this + function so it shouldn't be done by the application. + + Usage example: + + uLong crc = crc32(0L, Z_NULL, 0); + + while (read_buffer(buffer, length) != EOF) { + crc = crc32(crc, buffer, length); + } + if (crc != original_crc) error(); +*/ + +ZEXTERN uLong ZEXPORT crc32_z(uLong crc, const Bytef *buf, + z_size_t len); +/* + Same as crc32(), but with a size_t length. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2); + + Combine two CRC-32 check values into one. For two sequences of bytes, + seq1 and seq2 with lengths len1 and len2, CRC-32 check values were + calculated for each, crc1 and crc2. crc32_combine() returns the CRC-32 + check value of seq1 and seq2 concatenated, requiring only crc1, crc2, and + len2. len2 must be non-negative. +*/ + +/* +ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t len2); + + Return the operator corresponding to length len2, to be used with + crc32_combine_op(). len2 must be non-negative. +*/ + +ZEXTERN uLong ZEXPORT crc32_combine_op(uLong crc1, uLong crc2, uLong op); +/* + Give the same result as crc32_combine(), using op in place of len2. op is + is generated from len2 by crc32_combine_gen(). This will be faster than + crc32_combine() if the generated op is used more than once. +*/ + + + /* various hacks, don't look :) */ + +/* deflateInit and inflateInit are macros to allow checking the zlib version + * and the compiler's view of z_stream: + */ +ZEXTERN int ZEXPORT deflateInit_(z_streamp strm, int level, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateInit_(z_streamp strm, + const char *version, int stream_size); +ZEXTERN int ZEXPORT deflateInit2_(z_streamp strm, int level, int method, + int windowBits, int memLevel, + int strategy, const char *version, + int stream_size); +ZEXTERN int ZEXPORT inflateInit2_(z_streamp strm, int windowBits, + const char *version, int stream_size); +ZEXTERN int ZEXPORT inflateBackInit_(z_streamp strm, int windowBits, + unsigned char FAR *window, + const char *version, + int stream_size); +#ifdef Z_PREFIX_SET +# define z_deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define z_inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define z_inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#else +# define deflateInit(strm, level) \ + deflateInit_((strm), (level), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit(strm) \ + inflateInit_((strm), ZLIB_VERSION, (int)sizeof(z_stream)) +# define deflateInit2(strm, level, method, windowBits, memLevel, strategy) \ + deflateInit2_((strm),(level),(method),(windowBits),(memLevel),\ + (strategy), ZLIB_VERSION, (int)sizeof(z_stream)) +# define inflateInit2(strm, windowBits) \ + inflateInit2_((strm), (windowBits), ZLIB_VERSION, \ + (int)sizeof(z_stream)) +# define inflateBackInit(strm, windowBits, window) \ + inflateBackInit_((strm), (windowBits), (window), \ + ZLIB_VERSION, (int)sizeof(z_stream)) +#endif + +#ifndef Z_SOLO + +/* gzgetc() macro and its supporting function and exposed data structure. Note + * that the real internal state is much larger than the exposed structure. + * This abbreviated structure exposes just enough for the gzgetc() macro. The + * user should not mess with these exposed elements, since their names or + * behavior could change in the future, perhaps even capriciously. They can + * only be used by the gzgetc() macro. You have been warned. + */ +struct gzFile_s { + unsigned have; + unsigned char *next; + z_off64_t pos; +}; +ZEXTERN int ZEXPORT gzgetc_(gzFile file); /* backward compatibility */ +#ifdef Z_PREFIX_SET +# undef z_gzgetc +# define z_gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#else +# define gzgetc(g) \ + ((g)->have ? ((g)->have--, (g)->pos++, *((g)->next)++) : (gzgetc)(g)) +#endif + +/* provide 64-bit offset functions if _LARGEFILE64_SOURCE defined, and/or + * change the regular functions to 64 bits if _FILE_OFFSET_BITS is 64 (if + * both are true, the application gets the *64 functions, and the regular + * functions are changed to 64 bits) -- in case these are set on systems + * without large file support, _LFS64_LARGEFILE must also be true + */ +#ifdef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off64_t ZEXPORT gzseek64(gzFile, z_off64_t, int); + ZEXTERN z_off64_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off64_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off64_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off64_t); +#endif + +#if !defined(ZLIB_INTERNAL) && defined(Z_WANT64) +# ifdef Z_PREFIX_SET +# define z_gzopen z_gzopen64 +# define z_gzseek z_gzseek64 +# define z_gztell z_gztell64 +# define z_gzoffset z_gzoffset64 +# define z_adler32_combine z_adler32_combine64 +# define z_crc32_combine z_crc32_combine64 +# define z_crc32_combine_gen z_crc32_combine_gen64 +# else +# define gzopen gzopen64 +# define gzseek gzseek64 +# define gztell gztell64 +# define gzoffset gzoffset64 +# define adler32_combine adler32_combine64 +# define crc32_combine crc32_combine64 +# define crc32_combine_gen crc32_combine_gen64 +# endif +# ifndef Z_LARGE64 + ZEXTERN gzFile ZEXPORT gzopen64(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek64(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell64(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset64(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine64(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen64(z_off_t); +# endif +#else + ZEXTERN gzFile ZEXPORT gzopen(const char *, const char *); + ZEXTERN z_off_t ZEXPORT gzseek(gzFile, z_off_t, int); + ZEXTERN z_off_t ZEXPORT gztell(gzFile); + ZEXTERN z_off_t ZEXPORT gzoffset(gzFile); + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); +#endif + +#else /* Z_SOLO */ + + ZEXTERN uLong ZEXPORT adler32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine(uLong, uLong, z_off_t); + ZEXTERN uLong ZEXPORT crc32_combine_gen(z_off_t); + +#endif /* !Z_SOLO */ + +/* undocumented functions */ +ZEXTERN const char * ZEXPORT zError(int); +ZEXTERN int ZEXPORT inflateSyncPoint(z_streamp); +ZEXTERN const z_crc_t FAR * ZEXPORT get_crc_table(void); +ZEXTERN int ZEXPORT inflateUndermine(z_streamp, int); +ZEXTERN int ZEXPORT inflateValidate(z_streamp, int); +ZEXTERN unsigned long ZEXPORT inflateCodesUsed(z_streamp); +ZEXTERN int ZEXPORT inflateResetKeep(z_streamp); +ZEXTERN int ZEXPORT deflateResetKeep(z_streamp); +#if defined(_WIN32) && !defined(Z_SOLO) +ZEXTERN gzFile ZEXPORT gzopen_w(const wchar_t *path, + const char *mode); +#endif +#if defined(STDC) || defined(Z_HAVE_STDARG_H) +# ifndef Z_SOLO +ZEXTERN int ZEXPORTVA gzvprintf(gzFile file, + const char *format, + va_list va); +# endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* ZLIB_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd new file mode 100644 index 00000000..31dcc406 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/htmlparser.pxd @@ -0,0 +1,56 @@ +from libc.string cimport const_char + +from lxml.includes.tree cimport xmlDoc +from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback +from lxml.includes.xmlparser cimport xmlParserCtxt, xmlSAXHandler, xmlSAXHandlerV1 + +cdef extern from "libxml/HTMLparser.h" nogil: + ctypedef enum htmlParserOption: + HTML_PARSE_NOERROR # suppress error reports + HTML_PARSE_NOWARNING # suppress warning reports + HTML_PARSE_PEDANTIC # pedantic error reporting + HTML_PARSE_NOBLANKS # remove blank nodes + HTML_PARSE_NONET # Forbid network access + # libxml2 2.6.21+ only: + HTML_PARSE_RECOVER # Relaxed parsing + HTML_PARSE_COMPACT # compact small text nodes + # libxml2 2.7.7+ only: + HTML_PARSE_NOIMPLIED # Do not add implied html/body... elements + # libxml2 2.7.8+ only: + HTML_PARSE_NODEFDTD # do not default a doctype if not found + # libxml2 2.8.0+ only: + XML_PARSE_IGNORE_ENC # ignore internal document encoding hint + + xmlSAXHandlerV1 htmlDefaultSAXHandler + + cdef xmlParserCtxt* htmlCreateMemoryParserCtxt( + char* buffer, int size) + cdef xmlParserCtxt* htmlCreateFileParserCtxt( + char* filename, char* encoding) + cdef xmlParserCtxt* htmlCreatePushParserCtxt(xmlSAXHandler* sax, + void* user_data, + char* chunk, int size, + char* filename, int enc) + cdef void htmlFreeParserCtxt(xmlParserCtxt* ctxt) + cdef void htmlCtxtReset(xmlParserCtxt* ctxt) + cdef int htmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) + cdef int htmlParseDocument(xmlParserCtxt* ctxt) + cdef int htmlParseChunk(xmlParserCtxt* ctxt, + char* chunk, int size, int terminate) + + cdef xmlDoc* htmlCtxtReadFile(xmlParserCtxt* ctxt, + char* filename, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* buffer, char* URL, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadIO(xmlParserCtxt* ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void* ioctx, + char* URL, const_char* encoding, + int options) + cdef xmlDoc* htmlCtxtReadMemory(xmlParserCtxt* ctxt, + char* buffer, int size, + char* filename, const_char* encoding, + int options) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/__init__.py b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/__init__.py diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exslt.h b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exslt.h new file mode 100644 index 00000000..dfbd09be --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exslt.h @@ -0,0 +1,108 @@ +/* + * Summary: main header file + * + * Copy: See Copyright for the status of this software. + */ + + +#ifndef __EXSLT_H__ +#define __EXSLT_H__ + +#include <libxml/tree.h> +#include <libxml/xpath.h> +#include "exsltexports.h" +#include <libexslt/exsltconfig.h> + +#ifdef __cplusplus +extern "C" { +#endif + +EXSLTPUBVAR const char *exsltLibraryVersion; +EXSLTPUBVAR const int exsltLibexsltVersion; +EXSLTPUBVAR const int exsltLibxsltVersion; +EXSLTPUBVAR const int exsltLibxmlVersion; + +/** + * EXSLT_COMMON_NAMESPACE: + * + * Namespace for EXSLT common functions + */ +#define EXSLT_COMMON_NAMESPACE ((const xmlChar *) "http://exslt.org/common") +/** + * EXSLT_CRYPTO_NAMESPACE: + * + * Namespace for EXSLT crypto functions + */ +#define EXSLT_CRYPTO_NAMESPACE ((const xmlChar *) "http://exslt.org/crypto") +/** + * EXSLT_MATH_NAMESPACE: + * + * Namespace for EXSLT math functions + */ +#define EXSLT_MATH_NAMESPACE ((const xmlChar *) "http://exslt.org/math") +/** + * EXSLT_SETS_NAMESPACE: + * + * Namespace for EXSLT set functions + */ +#define EXSLT_SETS_NAMESPACE ((const xmlChar *) "http://exslt.org/sets") +/** + * EXSLT_FUNCTIONS_NAMESPACE: + * + * Namespace for EXSLT functions extension functions + */ +#define EXSLT_FUNCTIONS_NAMESPACE ((const xmlChar *) "http://exslt.org/functions") +/** + * EXSLT_STRINGS_NAMESPACE: + * + * Namespace for EXSLT strings functions + */ +#define EXSLT_STRINGS_NAMESPACE ((const xmlChar *) "http://exslt.org/strings") +/** + * EXSLT_DATE_NAMESPACE: + * + * Namespace for EXSLT date functions + */ +#define EXSLT_DATE_NAMESPACE ((const xmlChar *) "http://exslt.org/dates-and-times") +/** + * EXSLT_DYNAMIC_NAMESPACE: + * + * Namespace for EXSLT dynamic functions + */ +#define EXSLT_DYNAMIC_NAMESPACE ((const xmlChar *) "http://exslt.org/dynamic") + +/** + * SAXON_NAMESPACE: + * + * Namespace for SAXON extensions functions + */ +#define SAXON_NAMESPACE ((const xmlChar *) "http://icl.com/saxon") + +EXSLTPUBFUN void EXSLTCALL exsltCommonRegister (void); +#ifdef EXSLT_CRYPTO_ENABLED +EXSLTPUBFUN void EXSLTCALL exsltCryptoRegister (void); +#endif +EXSLTPUBFUN void EXSLTCALL exsltMathRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltSetsRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltFuncRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltStrRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltDateRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltSaxonRegister (void); +EXSLTPUBFUN void EXSLTCALL exsltDynRegister(void); + +EXSLTPUBFUN void EXSLTCALL exsltRegisterAll (void); + +EXSLTPUBFUN int EXSLTCALL exsltDateXpathCtxtRegister (xmlXPathContextPtr ctxt, + const xmlChar *prefix); +EXSLTPUBFUN int EXSLTCALL exsltMathXpathCtxtRegister (xmlXPathContextPtr ctxt, + const xmlChar *prefix); +EXSLTPUBFUN int EXSLTCALL exsltSetsXpathCtxtRegister (xmlXPathContextPtr ctxt, + const xmlChar *prefix); +EXSLTPUBFUN int EXSLTCALL exsltStrXpathCtxtRegister (xmlXPathContextPtr ctxt, + const xmlChar *prefix); + +#ifdef __cplusplus +} +#endif +#endif /* __EXSLT_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltconfig.h b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltconfig.h new file mode 100644 index 00000000..62aa98cb --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltconfig.h @@ -0,0 +1,70 @@ +/* + * exsltconfig.h: compile-time version information for the EXSLT library + * + * See Copyright for the status of this software. + * + * daniel@veillard.com + */ + +#ifndef __XML_EXSLTCONFIG_H__ +#define __XML_EXSLTCONFIG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * LIBEXSLT_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBEXSLT_DOTTED_VERSION "0.8.23" + +/** + * LIBEXSLT_VERSION: + * + * the version number: 1.2.3 value is 10203 + */ +#define LIBEXSLT_VERSION 823 + +/** + * LIBEXSLT_VERSION_STRING: + * + * the version number string, 1.2.3 value is "10203" + */ +#define LIBEXSLT_VERSION_STRING "823" + +/** + * LIBEXSLT_VERSION_EXTRA: + * + * extra version information, used to show a Git commit description + */ +#define LIBEXSLT_VERSION_EXTRA "" + +/** + * WITH_CRYPTO: + * + * Whether crypto support is configured into exslt + */ +#if 0 +#define EXSLT_CRYPTO_ENABLED +#endif + +/** + * ATTRIBUTE_UNUSED: + * + * This macro is used to flag unused function parameters to GCC + */ +#ifdef __GNUC__ +#ifndef ATTRIBUTE_UNUSED +#define ATTRIBUTE_UNUSED __attribute__((unused)) +#endif +#else +#define ATTRIBUTE_UNUSED +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_EXSLTCONFIG_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltexports.h b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltexports.h new file mode 100644 index 00000000..ee79ec7a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libexslt/exsltexports.h @@ -0,0 +1,63 @@ +/* + * Summary: macros for marking symbols as exportable/importable. + * + * Copy: See Copyright for the status of this software. + */ + +#ifndef __EXSLT_EXPORTS_H__ +#define __EXSLT_EXPORTS_H__ + +#if defined(_WIN32) || defined(__CYGWIN__) +/** DOC_DISABLE */ + +#ifdef LIBEXSLT_STATIC + #define EXSLTPUBLIC +#elif defined(IN_LIBEXSLT) + #define EXSLTPUBLIC __declspec(dllexport) +#else + #define EXSLTPUBLIC __declspec(dllimport) +#endif + +#define EXSLTCALL __cdecl + +/** DOC_ENABLE */ +#else /* not Windows */ + +/** + * EXSLTPUBLIC: + * + * Macro which declares a public symbol + */ +#define EXSLTPUBLIC + +/** + * EXSLTCALL: + * + * Macro which declares the calling convention for exported functions + */ +#define EXSLTCALL + +#endif /* platform switch */ + +/* + * EXSLTPUBFUN: + * + * Macro which declares an exportable function + */ +#define EXSLTPUBFUN EXSLTPUBLIC + +/** + * EXSLTPUBVAR: + * + * Macro which declares an exportable variable + */ +#define EXSLTPUBVAR EXSLTPUBLIC extern + +/* Compatibility */ +#if !defined(LIBEXSLT_PUBLIC) +#define LIBEXSLT_PUBLIC EXSLTPUBVAR +#endif + +#endif /* __EXSLT_EXPORTS_H__ */ + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLparser.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLparser.h new file mode 100644 index 00000000..e16d7749 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLparser.h @@ -0,0 +1,343 @@ +/* + * Summary: interface for an HTML 4.0 non-verifying parser + * Description: this module implements an HTML 4.0 non-verifying parser + * with API compatible with the XML parser ones. It should + * be able to parse "real world" HTML, even if severely + * broken from a specification point of view. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __HTML_PARSER_H__ +#define __HTML_PARSER_H__ +#include <libxml/xmlversion.h> +#include <libxml/parser.h> + +#ifdef LIBXML_HTML_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Most of the back-end structures from XML and HTML are shared. + */ +typedef xmlParserCtxt htmlParserCtxt; +typedef xmlParserCtxtPtr htmlParserCtxtPtr; +typedef xmlParserNodeInfo htmlParserNodeInfo; +typedef xmlSAXHandler htmlSAXHandler; +typedef xmlSAXHandlerPtr htmlSAXHandlerPtr; +typedef xmlParserInput htmlParserInput; +typedef xmlParserInputPtr htmlParserInputPtr; +typedef xmlDocPtr htmlDocPtr; +typedef xmlNodePtr htmlNodePtr; + +/* + * Internal description of an HTML element, representing HTML 4.01 + * and XHTML 1.0 (which share the same structure). + */ +typedef struct _htmlElemDesc htmlElemDesc; +typedef htmlElemDesc *htmlElemDescPtr; +struct _htmlElemDesc { + const char *name; /* The tag name */ + char startTag; /* Whether the start tag can be implied */ + char endTag; /* Whether the end tag can be implied */ + char saveEndTag; /* Whether the end tag should be saved */ + char empty; /* Is this an empty element ? */ + char depr; /* Is this a deprecated element ? */ + char dtd; /* 1: only in Loose DTD, 2: only Frameset one */ + char isinline; /* is this a block 0 or inline 1 element */ + const char *desc; /* the description */ + +/* NRK Jan.2003 + * New fields encapsulating HTML structure + * + * Bugs: + * This is a very limited representation. It fails to tell us when + * an element *requires* subelements (we only have whether they're + * allowed or not), and it doesn't tell us where CDATA and PCDATA + * are allowed. Some element relationships are not fully represented: + * these are flagged with the word MODIFIER + */ + const char** subelts; /* allowed sub-elements of this element */ + const char* defaultsubelt; /* subelement for suggested auto-repair + if necessary or NULL */ + const char** attrs_opt; /* Optional Attributes */ + const char** attrs_depr; /* Additional deprecated attributes */ + const char** attrs_req; /* Required attributes */ +}; + +/* + * Internal description of an HTML entity. + */ +typedef struct _htmlEntityDesc htmlEntityDesc; +typedef htmlEntityDesc *htmlEntityDescPtr; +struct _htmlEntityDesc { + unsigned int value; /* the UNICODE value for the character */ + const char *name; /* The entity name */ + const char *desc; /* the description */ +}; + +/** DOC_DISABLE */ +#ifdef LIBXML_SAX1_ENABLED + #define XML_GLOBALS_HTML \ + XML_OP(htmlDefaultSAXHandler, xmlSAXHandlerV1, XML_DEPRECATED) +#else + #define XML_GLOBALS_HTML +#endif + +#define XML_OP XML_DECLARE_GLOBAL +XML_GLOBALS_HTML +#undef XML_OP + +#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define htmlDefaultSAXHandler XML_GLOBAL_MACRO(htmlDefaultSAXHandler) +#endif +/** DOC_ENABLE */ + +/* + * There is only few public functions. + */ +XML_DEPRECATED +XMLPUBFUN void + htmlInitAutoClose (void); +XMLPUBFUN const htmlElemDesc * + htmlTagLookup (const xmlChar *tag); +XMLPUBFUN const htmlEntityDesc * + htmlEntityLookup(const xmlChar *name); +XMLPUBFUN const htmlEntityDesc * + htmlEntityValueLookup(unsigned int value); + +XMLPUBFUN int + htmlIsAutoClosed(htmlDocPtr doc, + htmlNodePtr elem); +XMLPUBFUN int + htmlAutoCloseTag(htmlDocPtr doc, + const xmlChar *name, + htmlNodePtr elem); +XML_DEPRECATED +XMLPUBFUN const htmlEntityDesc * + htmlParseEntityRef(htmlParserCtxtPtr ctxt, + const xmlChar **str); +XML_DEPRECATED +XMLPUBFUN int + htmlParseCharRef(htmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + htmlParseElement(htmlParserCtxtPtr ctxt); + +XMLPUBFUN htmlParserCtxtPtr + htmlNewParserCtxt(void); +XMLPUBFUN htmlParserCtxtPtr + htmlNewSAXParserCtxt(const htmlSAXHandler *sax, + void *userData); + +XMLPUBFUN htmlParserCtxtPtr + htmlCreateMemoryParserCtxt(const char *buffer, + int size); + +XMLPUBFUN int + htmlParseDocument(htmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN htmlDocPtr + htmlSAXParseDoc (const xmlChar *cur, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +XMLPUBFUN htmlDocPtr + htmlParseDoc (const xmlChar *cur, + const char *encoding); +XMLPUBFUN htmlParserCtxtPtr + htmlCreateFileParserCtxt(const char *filename, + const char *encoding); +XML_DEPRECATED +XMLPUBFUN htmlDocPtr + htmlSAXParseFile(const char *filename, + const char *encoding, + htmlSAXHandlerPtr sax, + void *userData); +XMLPUBFUN htmlDocPtr + htmlParseFile (const char *filename, + const char *encoding); +XMLPUBFUN int + UTF8ToHtml (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +XMLPUBFUN int + htmlEncodeEntities(unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen, int quoteChar); +XMLPUBFUN int + htmlIsScriptAttribute(const xmlChar *name); +XMLPUBFUN int + htmlHandleOmittedElem(int val); + +#ifdef LIBXML_PUSH_ENABLED +/** + * Interfaces for the Push mode. + */ +XMLPUBFUN htmlParserCtxtPtr + htmlCreatePushParserCtxt(htmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename, + xmlCharEncoding enc); +XMLPUBFUN int + htmlParseChunk (htmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); +#endif /* LIBXML_PUSH_ENABLED */ + +XMLPUBFUN void + htmlFreeParserCtxt (htmlParserCtxtPtr ctxt); + +/* + * New set of simpler/more flexible APIs + */ +/** + * xmlParserOption: + * + * This is the set of XML parser options that can be passed down + * to the xmlReadDoc() and similar calls. + */ +typedef enum { + HTML_PARSE_RECOVER = 1<<0, /* Relaxed parsing */ + HTML_PARSE_NODEFDTD = 1<<2, /* do not default a doctype if not found */ + HTML_PARSE_NOERROR = 1<<5, /* suppress error reports */ + HTML_PARSE_NOWARNING= 1<<6, /* suppress warning reports */ + HTML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ + HTML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ + HTML_PARSE_NONET = 1<<11,/* Forbid network access */ + HTML_PARSE_NOIMPLIED= 1<<13,/* Do not add implied html/body... elements */ + HTML_PARSE_COMPACT = 1<<16,/* compact small text nodes */ + HTML_PARSE_IGNORE_ENC=1<<21 /* ignore internal document encoding hint */ +} htmlParserOption; + +XMLPUBFUN void + htmlCtxtReset (htmlParserCtxtPtr ctxt); +XMLPUBFUN int + htmlCtxtUseOptions (htmlParserCtxtPtr ctxt, + int options); +XMLPUBFUN htmlDocPtr + htmlReadDoc (const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlReadFile (const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlReadMemory (const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlReadFd (int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlReadIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlCtxtReadDoc (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlCtxtReadFile (xmlParserCtxtPtr ctxt, + const char *filename, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlCtxtReadMemory (xmlParserCtxtPtr ctxt, + const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlCtxtReadFd (xmlParserCtxtPtr ctxt, + int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN htmlDocPtr + htmlCtxtReadIO (xmlParserCtxtPtr ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); + +/* NRK/Jan2003: further knowledge of HTML structure + */ +typedef enum { + HTML_NA = 0 , /* something we don't check at all */ + HTML_INVALID = 0x1 , + HTML_DEPRECATED = 0x2 , + HTML_VALID = 0x4 , + HTML_REQUIRED = 0xc /* VALID bit set so ( & HTML_VALID ) is TRUE */ +} htmlStatus ; + +/* Using htmlElemDesc rather than name here, to emphasise the fact + that otherwise there's a lookup overhead +*/ +XMLPUBFUN htmlStatus htmlAttrAllowed(const htmlElemDesc*, const xmlChar*, int) ; +XMLPUBFUN int htmlElementAllowedHere(const htmlElemDesc*, const xmlChar*) ; +XMLPUBFUN htmlStatus htmlElementStatusHere(const htmlElemDesc*, const htmlElemDesc*) ; +XMLPUBFUN htmlStatus htmlNodeStatus(const htmlNodePtr, int) ; +/** + * htmlDefaultSubelement: + * @elt: HTML element + * + * Returns the default subelement for this element + */ +#define htmlDefaultSubelement(elt) elt->defaultsubelt +/** + * htmlElementAllowedHereDesc: + * @parent: HTML parent element + * @elt: HTML element + * + * Checks whether an HTML element description may be a + * direct child of the specified element. + * + * Returns 1 if allowed; 0 otherwise. + */ +#define htmlElementAllowedHereDesc(parent,elt) \ + htmlElementAllowedHere((parent), (elt)->name) +/** + * htmlRequiredAttrs: + * @elt: HTML element + * + * Returns the attributes required for the specified element. + */ +#define htmlRequiredAttrs(elt) (elt)->attrs_req + + +#ifdef __cplusplus +} +#endif + +#else /* LIBXML_HTML_ENABLED */ + +/** DOC_DISABLE */ +#define XML_GLOBALS_HTML +/** DOC_ENABLE */ + +#endif /* LIBXML_HTML_ENABLED */ +#endif /* __HTML_PARSER_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLtree.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLtree.h new file mode 100644 index 00000000..8e1ba90e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/HTMLtree.h @@ -0,0 +1,147 @@ +/* + * Summary: specific APIs to process HTML tree, especially serialization + * Description: this module implements a few function needed to process + * tree in an HTML specific way. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __HTML_TREE_H__ +#define __HTML_TREE_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/tree.h> +#include <libxml/HTMLparser.h> + +#ifdef LIBXML_HTML_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * HTML_TEXT_NODE: + * + * Macro. A text node in a HTML document is really implemented + * the same way as a text node in an XML document. + */ +#define HTML_TEXT_NODE XML_TEXT_NODE +/** + * HTML_ENTITY_REF_NODE: + * + * Macro. An entity reference in a HTML document is really implemented + * the same way as an entity reference in an XML document. + */ +#define HTML_ENTITY_REF_NODE XML_ENTITY_REF_NODE +/** + * HTML_COMMENT_NODE: + * + * Macro. A comment in a HTML document is really implemented + * the same way as a comment in an XML document. + */ +#define HTML_COMMENT_NODE XML_COMMENT_NODE +/** + * HTML_PRESERVE_NODE: + * + * Macro. A preserved node in a HTML document is really implemented + * the same way as a CDATA section in an XML document. + */ +#define HTML_PRESERVE_NODE XML_CDATA_SECTION_NODE +/** + * HTML_PI_NODE: + * + * Macro. A processing instruction in a HTML document is really implemented + * the same way as a processing instruction in an XML document. + */ +#define HTML_PI_NODE XML_PI_NODE + +XMLPUBFUN htmlDocPtr + htmlNewDoc (const xmlChar *URI, + const xmlChar *ExternalID); +XMLPUBFUN htmlDocPtr + htmlNewDocNoDtD (const xmlChar *URI, + const xmlChar *ExternalID); +XMLPUBFUN const xmlChar * + htmlGetMetaEncoding (htmlDocPtr doc); +XMLPUBFUN int + htmlSetMetaEncoding (htmlDocPtr doc, + const xmlChar *encoding); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + htmlDocDumpMemory (xmlDocPtr cur, + xmlChar **mem, + int *size); +XMLPUBFUN void + htmlDocDumpMemoryFormat (xmlDocPtr cur, + xmlChar **mem, + int *size, + int format); +XMLPUBFUN int + htmlDocDump (FILE *f, + xmlDocPtr cur); +XMLPUBFUN int + htmlSaveFile (const char *filename, + xmlDocPtr cur); +XMLPUBFUN int + htmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur); +XMLPUBFUN void + htmlNodeDumpFile (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur); +XMLPUBFUN int + htmlNodeDumpFileFormat (FILE *out, + xmlDocPtr doc, + xmlNodePtr cur, + const char *encoding, + int format); +XMLPUBFUN int + htmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); +XMLPUBFUN int + htmlSaveFileFormat (const char *filename, + xmlDocPtr cur, + const char *encoding, + int format); + +XMLPUBFUN void + htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + const char *encoding, + int format); +XMLPUBFUN void + htmlDocContentDumpOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +XMLPUBFUN void + htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding, + int format); +XMLPUBFUN void + htmlNodeDumpOutput (xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + const char *encoding); + +#endif /* LIBXML_OUTPUT_ENABLED */ + +XMLPUBFUN int + htmlIsBooleanAttr (const xmlChar *name); + + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_HTML_ENABLED */ + +#endif /* __HTML_TREE_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX.h new file mode 100644 index 00000000..eea1057b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX.h @@ -0,0 +1,202 @@ +/* + * Summary: Old SAX version 1 handler, deprecated + * Description: DEPRECATED set of SAX version 1 interfaces used to + * build the DOM tree. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SAX_H__ +#define __XML_SAX_H__ + +#include <libxml/xmlversion.h> +#include <libxml/parser.h> + +#ifdef LIBXML_LEGACY_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif +XML_DEPRECATED +XMLPUBFUN const xmlChar * + getPublicId (void *ctx); +XML_DEPRECATED +XMLPUBFUN const xmlChar * + getSystemId (void *ctx); +XML_DEPRECATED +XMLPUBFUN void + setDocumentLocator (void *ctx, + xmlSAXLocatorPtr loc); + +XML_DEPRECATED +XMLPUBFUN int + getLineNumber (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + getColumnNumber (void *ctx); + +XML_DEPRECATED +XMLPUBFUN int + isStandalone (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + hasInternalSubset (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + hasExternalSubset (void *ctx); + +XML_DEPRECATED +XMLPUBFUN void + internalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XML_DEPRECATED +XMLPUBFUN void + externalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XML_DEPRECATED +XMLPUBFUN xmlEntityPtr + getEntity (void *ctx, + const xmlChar *name); +XML_DEPRECATED +XMLPUBFUN xmlEntityPtr + getParameterEntity (void *ctx, + const xmlChar *name); +XML_DEPRECATED +XMLPUBFUN xmlParserInputPtr + resolveEntity (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); + +XML_DEPRECATED +XMLPUBFUN void + entityDecl (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +XML_DEPRECATED +XMLPUBFUN void + attributeDecl (void *ctx, + const xmlChar *elem, + const xmlChar *fullname, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +XML_DEPRECATED +XMLPUBFUN void + elementDecl (void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +XML_DEPRECATED +XMLPUBFUN void + notationDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +XML_DEPRECATED +XMLPUBFUN void + unparsedEntityDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); + +XML_DEPRECATED +XMLPUBFUN void + startDocument (void *ctx); +XML_DEPRECATED +XMLPUBFUN void + endDocument (void *ctx); +XML_DEPRECATED +XMLPUBFUN void + attribute (void *ctx, + const xmlChar *fullname, + const xmlChar *value); +XML_DEPRECATED +XMLPUBFUN void + startElement (void *ctx, + const xmlChar *fullname, + const xmlChar **atts); +XML_DEPRECATED +XMLPUBFUN void + endElement (void *ctx, + const xmlChar *name); +XML_DEPRECATED +XMLPUBFUN void + reference (void *ctx, + const xmlChar *name); +XML_DEPRECATED +XMLPUBFUN void + characters (void *ctx, + const xmlChar *ch, + int len); +XML_DEPRECATED +XMLPUBFUN void + ignorableWhitespace (void *ctx, + const xmlChar *ch, + int len); +XML_DEPRECATED +XMLPUBFUN void + processingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data); +XML_DEPRECATED +XMLPUBFUN void + globalNamespace (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +XML_DEPRECATED +XMLPUBFUN void + setNamespace (void *ctx, + const xmlChar *name); +XML_DEPRECATED +XMLPUBFUN xmlNsPtr + getNamespace (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + checkNamespace (void *ctx, + xmlChar *nameSpace); +XML_DEPRECATED +XMLPUBFUN void + namespaceDecl (void *ctx, + const xmlChar *href, + const xmlChar *prefix); +XML_DEPRECATED +XMLPUBFUN void + comment (void *ctx, + const xmlChar *value); +XML_DEPRECATED +XMLPUBFUN void + cdataBlock (void *ctx, + const xmlChar *value, + int len); + +#ifdef LIBXML_SAX1_ENABLED +XML_DEPRECATED +XMLPUBFUN void + initxmlDefaultSAXHandler (xmlSAXHandlerV1 *hdlr, + int warning); +#ifdef LIBXML_HTML_ENABLED +XML_DEPRECATED +XMLPUBFUN void + inithtmlDefaultSAXHandler (xmlSAXHandlerV1 *hdlr); +#endif +#endif /* LIBXML_SAX1_ENABLED */ + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_LEGACY_ENABLED */ + +#endif /* __XML_SAX_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX2.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX2.h new file mode 100644 index 00000000..4c4ecce8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/SAX2.h @@ -0,0 +1,171 @@ +/* + * Summary: SAX2 parser interface used to build the DOM tree + * Description: those are the default SAX2 interfaces used by + * the library when building DOM tree. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SAX2_H__ +#define __XML_SAX2_H__ + +#include <libxml/xmlversion.h> +#include <libxml/parser.h> + +#ifdef __cplusplus +extern "C" { +#endif +XMLPUBFUN const xmlChar * + xmlSAX2GetPublicId (void *ctx); +XMLPUBFUN const xmlChar * + xmlSAX2GetSystemId (void *ctx); +XMLPUBFUN void + xmlSAX2SetDocumentLocator (void *ctx, + xmlSAXLocatorPtr loc); + +XMLPUBFUN int + xmlSAX2GetLineNumber (void *ctx); +XMLPUBFUN int + xmlSAX2GetColumnNumber (void *ctx); + +XMLPUBFUN int + xmlSAX2IsStandalone (void *ctx); +XMLPUBFUN int + xmlSAX2HasInternalSubset (void *ctx); +XMLPUBFUN int + xmlSAX2HasExternalSubset (void *ctx); + +XMLPUBFUN void + xmlSAX2InternalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN void + xmlSAX2ExternalSubset (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN xmlEntityPtr + xmlSAX2GetEntity (void *ctx, + const xmlChar *name); +XMLPUBFUN xmlEntityPtr + xmlSAX2GetParameterEntity (void *ctx, + const xmlChar *name); +XMLPUBFUN xmlParserInputPtr + xmlSAX2ResolveEntity (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); + +XMLPUBFUN void + xmlSAX2EntityDecl (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +XMLPUBFUN void + xmlSAX2AttributeDecl (void *ctx, + const xmlChar *elem, + const xmlChar *fullname, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +XMLPUBFUN void + xmlSAX2ElementDecl (void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +XMLPUBFUN void + xmlSAX2NotationDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +XMLPUBFUN void + xmlSAX2UnparsedEntityDecl (void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); + +XMLPUBFUN void + xmlSAX2StartDocument (void *ctx); +XMLPUBFUN void + xmlSAX2EndDocument (void *ctx); +#if defined(LIBXML_SAX1_ENABLED) || defined(LIBXML_HTML_ENABLED) || \ + defined(LIBXML_WRITER_ENABLED) || defined(LIBXML_LEGACY_ENABLED) +XMLPUBFUN void + xmlSAX2StartElement (void *ctx, + const xmlChar *fullname, + const xmlChar **atts); +XMLPUBFUN void + xmlSAX2EndElement (void *ctx, + const xmlChar *name); +#endif /* LIBXML_SAX1_ENABLED or LIBXML_HTML_ENABLED or LIBXML_LEGACY_ENABLED */ +XMLPUBFUN void + xmlSAX2StartElementNs (void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *URI, + int nb_namespaces, + const xmlChar **namespaces, + int nb_attributes, + int nb_defaulted, + const xmlChar **attributes); +XMLPUBFUN void + xmlSAX2EndElementNs (void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *URI); +XMLPUBFUN void + xmlSAX2Reference (void *ctx, + const xmlChar *name); +XMLPUBFUN void + xmlSAX2Characters (void *ctx, + const xmlChar *ch, + int len); +XMLPUBFUN void + xmlSAX2IgnorableWhitespace (void *ctx, + const xmlChar *ch, + int len); +XMLPUBFUN void + xmlSAX2ProcessingInstruction (void *ctx, + const xmlChar *target, + const xmlChar *data); +XMLPUBFUN void + xmlSAX2Comment (void *ctx, + const xmlChar *value); +XMLPUBFUN void + xmlSAX2CDataBlock (void *ctx, + const xmlChar *value, + int len); + +#ifdef LIBXML_SAX1_ENABLED +XML_DEPRECATED +XMLPUBFUN int + xmlSAXDefaultVersion (int version); +#endif /* LIBXML_SAX1_ENABLED */ + +XMLPUBFUN int + xmlSAXVersion (xmlSAXHandler *hdlr, + int version); +XMLPUBFUN void + xmlSAX2InitDefaultSAXHandler (xmlSAXHandler *hdlr, + int warning); +#ifdef LIBXML_HTML_ENABLED +XMLPUBFUN void + xmlSAX2InitHtmlDefaultSAXHandler(xmlSAXHandler *hdlr); +XML_DEPRECATED +XMLPUBFUN void + htmlDefaultSAXHandlerInit (void); +#endif +XML_DEPRECATED +XMLPUBFUN void + xmlDefaultSAXHandlerInit (void); +#ifdef __cplusplus +} +#endif +#endif /* __XML_SAX2_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/__init__.py b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/__init__.py diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/c14n.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/c14n.h new file mode 100644 index 00000000..f9bdf9b5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/c14n.h @@ -0,0 +1,126 @@ +/* + * Summary: Provide Canonical XML and Exclusive XML Canonicalization + * Description: the c14n modules provides a + * + * "Canonical XML" implementation + * http://www.w3.org/TR/xml-c14n + * + * and an + * + * "Exclusive XML Canonicalization" implementation + * http://www.w3.org/TR/xml-exc-c14n + + * Copy: See Copyright for the status of this software. + * + * Author: Aleksey Sanin <aleksey@aleksey.com> + */ +#ifndef __XML_C14N_H__ +#define __XML_C14N_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_C14N_ENABLED + +#include <libxml/tree.h> +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif /* __cplusplus */ + +/* + * XML Canonicalization + * http://www.w3.org/TR/xml-c14n + * + * Exclusive XML Canonicalization + * http://www.w3.org/TR/xml-exc-c14n + * + * Canonical form of an XML document could be created if and only if + * a) default attributes (if any) are added to all nodes + * b) all character and parsed entity references are resolved + * In order to achieve this in libxml2 the document MUST be loaded with + * following global settings: + * + * xmlLoadExtDtdDefaultValue = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + * xmlSubstituteEntitiesDefault(1); + * + * or corresponding parser context setting: + * xmlParserCtxtPtr ctxt; + * + * ... + * ctxt->loadsubset = XML_DETECT_IDS | XML_COMPLETE_ATTRS; + * ctxt->replaceEntities = 1; + * ... + */ + +/* + * xmlC14NMode: + * + * Predefined values for C14N modes + * + */ +typedef enum { + XML_C14N_1_0 = 0, /* Original C14N 1.0 spec */ + XML_C14N_EXCLUSIVE_1_0 = 1, /* Exclusive C14N 1.0 spec */ + XML_C14N_1_1 = 2 /* C14N 1.1 spec */ +} xmlC14NMode; + +XMLPUBFUN int + xmlC14NDocSaveTo (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int mode, /* a xmlC14NMode */ + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlOutputBufferPtr buf); + +XMLPUBFUN int + xmlC14NDocDumpMemory (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int mode, /* a xmlC14NMode */ + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlChar **doc_txt_ptr); + +XMLPUBFUN int + xmlC14NDocSave (xmlDocPtr doc, + xmlNodeSetPtr nodes, + int mode, /* a xmlC14NMode */ + xmlChar **inclusive_ns_prefixes, + int with_comments, + const char* filename, + int compression); + + +/** + * This is the core C14N function + */ +/** + * xmlC14NIsVisibleCallback: + * @user_data: user data + * @node: the current node + * @parent: the parent node + * + * Signature for a C14N callback on visible nodes + * + * Returns 1 if the node should be included + */ +typedef int (*xmlC14NIsVisibleCallback) (void* user_data, + xmlNodePtr node, + xmlNodePtr parent); + +XMLPUBFUN int + xmlC14NExecute (xmlDocPtr doc, + xmlC14NIsVisibleCallback is_visible_callback, + void* user_data, + int mode, /* a xmlC14NMode */ + xmlChar **inclusive_ns_prefixes, + int with_comments, + xmlOutputBufferPtr buf); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* LIBXML_C14N_ENABLED */ +#endif /* __XML_C14N_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/catalog.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/catalog.h new file mode 100644 index 00000000..02fa7ab2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/catalog.h @@ -0,0 +1,182 @@ +/** + * Summary: interfaces to the Catalog handling system + * Description: the catalog module implements the support for + * XML Catalogs and SGML catalogs + * + * SGML Open Technical Resolution TR9401:1997. + * http://www.jclark.com/sp/catalog.htm + * + * XML Catalogs Working Draft 06 August 2001 + * http://www.oasis-open.org/committees/entity/spec-2001-08-06.html + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_CATALOG_H__ +#define __XML_CATALOG_H__ + +#include <stdio.h> + +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> +#include <libxml/tree.h> + +#ifdef LIBXML_CATALOG_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XML_CATALOGS_NAMESPACE: + * + * The namespace for the XML Catalogs elements. + */ +#define XML_CATALOGS_NAMESPACE \ + (const xmlChar *) "urn:oasis:names:tc:entity:xmlns:xml:catalog" +/** + * XML_CATALOG_PI: + * + * The specific XML Catalog Processing Instruction name. + */ +#define XML_CATALOG_PI \ + (const xmlChar *) "oasis-xml-catalog" + +/* + * The API is voluntarily limited to general cataloging. + */ +typedef enum { + XML_CATA_PREFER_NONE = 0, + XML_CATA_PREFER_PUBLIC = 1, + XML_CATA_PREFER_SYSTEM +} xmlCatalogPrefer; + +typedef enum { + XML_CATA_ALLOW_NONE = 0, + XML_CATA_ALLOW_GLOBAL = 1, + XML_CATA_ALLOW_DOCUMENT = 2, + XML_CATA_ALLOW_ALL = 3 +} xmlCatalogAllow; + +typedef struct _xmlCatalog xmlCatalog; +typedef xmlCatalog *xmlCatalogPtr; + +/* + * Operations on a given catalog. + */ +XMLPUBFUN xmlCatalogPtr + xmlNewCatalog (int sgml); +XMLPUBFUN xmlCatalogPtr + xmlLoadACatalog (const char *filename); +XMLPUBFUN xmlCatalogPtr + xmlLoadSGMLSuperCatalog (const char *filename); +XMLPUBFUN int + xmlConvertSGMLCatalog (xmlCatalogPtr catal); +XMLPUBFUN int + xmlACatalogAdd (xmlCatalogPtr catal, + const xmlChar *type, + const xmlChar *orig, + const xmlChar *replace); +XMLPUBFUN int + xmlACatalogRemove (xmlCatalogPtr catal, + const xmlChar *value); +XMLPUBFUN xmlChar * + xmlACatalogResolve (xmlCatalogPtr catal, + const xmlChar *pubID, + const xmlChar *sysID); +XMLPUBFUN xmlChar * + xmlACatalogResolveSystem(xmlCatalogPtr catal, + const xmlChar *sysID); +XMLPUBFUN xmlChar * + xmlACatalogResolvePublic(xmlCatalogPtr catal, + const xmlChar *pubID); +XMLPUBFUN xmlChar * + xmlACatalogResolveURI (xmlCatalogPtr catal, + const xmlChar *URI); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlACatalogDump (xmlCatalogPtr catal, + FILE *out); +#endif /* LIBXML_OUTPUT_ENABLED */ +XMLPUBFUN void + xmlFreeCatalog (xmlCatalogPtr catal); +XMLPUBFUN int + xmlCatalogIsEmpty (xmlCatalogPtr catal); + +/* + * Global operations. + */ +XMLPUBFUN void + xmlInitializeCatalog (void); +XMLPUBFUN int + xmlLoadCatalog (const char *filename); +XMLPUBFUN void + xmlLoadCatalogs (const char *paths); +XMLPUBFUN void + xmlCatalogCleanup (void); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlCatalogDump (FILE *out); +#endif /* LIBXML_OUTPUT_ENABLED */ +XMLPUBFUN xmlChar * + xmlCatalogResolve (const xmlChar *pubID, + const xmlChar *sysID); +XMLPUBFUN xmlChar * + xmlCatalogResolveSystem (const xmlChar *sysID); +XMLPUBFUN xmlChar * + xmlCatalogResolvePublic (const xmlChar *pubID); +XMLPUBFUN xmlChar * + xmlCatalogResolveURI (const xmlChar *URI); +XMLPUBFUN int + xmlCatalogAdd (const xmlChar *type, + const xmlChar *orig, + const xmlChar *replace); +XMLPUBFUN int + xmlCatalogRemove (const xmlChar *value); +XMLPUBFUN xmlDocPtr + xmlParseCatalogFile (const char *filename); +XMLPUBFUN int + xmlCatalogConvert (void); + +/* + * Strictly minimal interfaces for per-document catalogs used + * by the parser. + */ +XMLPUBFUN void + xmlCatalogFreeLocal (void *catalogs); +XMLPUBFUN void * + xmlCatalogAddLocal (void *catalogs, + const xmlChar *URL); +XMLPUBFUN xmlChar * + xmlCatalogLocalResolve (void *catalogs, + const xmlChar *pubID, + const xmlChar *sysID); +XMLPUBFUN xmlChar * + xmlCatalogLocalResolveURI(void *catalogs, + const xmlChar *URI); +/* + * Preference settings. + */ +XMLPUBFUN int + xmlCatalogSetDebug (int level); +XMLPUBFUN xmlCatalogPrefer + xmlCatalogSetDefaultPrefer(xmlCatalogPrefer prefer); +XMLPUBFUN void + xmlCatalogSetDefaults (xmlCatalogAllow allow); +XMLPUBFUN xmlCatalogAllow + xmlCatalogGetDefaults (void); + + +/* DEPRECATED interfaces */ +XMLPUBFUN const xmlChar * + xmlCatalogGetSystem (const xmlChar *sysID); +XMLPUBFUN const xmlChar * + xmlCatalogGetPublic (const xmlChar *pubID); + +#ifdef __cplusplus +} +#endif +#endif /* LIBXML_CATALOG_ENABLED */ +#endif /* __XML_CATALOG_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/chvalid.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/chvalid.h new file mode 100644 index 00000000..8225c95e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/chvalid.h @@ -0,0 +1,230 @@ +/* + * Summary: Unicode character range checking + * Description: this module exports interfaces for the character + * range validation APIs + * + * This file is automatically generated from the cvs source + * definition files using the genChRanges.py Python script + * + * Generation date: Mon Mar 27 11:09:48 2006 + * Sources: chvalid.def + * Author: William Brack <wbrack@mmm.com.hk> + */ + +#ifndef __XML_CHVALID_H__ +#define __XML_CHVALID_H__ + +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Define our typedefs and structures + * + */ +typedef struct _xmlChSRange xmlChSRange; +typedef xmlChSRange *xmlChSRangePtr; +struct _xmlChSRange { + unsigned short low; + unsigned short high; +}; + +typedef struct _xmlChLRange xmlChLRange; +typedef xmlChLRange *xmlChLRangePtr; +struct _xmlChLRange { + unsigned int low; + unsigned int high; +}; + +typedef struct _xmlChRangeGroup xmlChRangeGroup; +typedef xmlChRangeGroup *xmlChRangeGroupPtr; +struct _xmlChRangeGroup { + int nbShortRange; + int nbLongRange; + const xmlChSRange *shortRange; /* points to an array of ranges */ + const xmlChLRange *longRange; +}; + +/** + * Range checking routine + */ +XMLPUBFUN int + xmlCharInRange(unsigned int val, const xmlChRangeGroup *group); + + +/** + * xmlIsBaseChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBaseChar_ch(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ + ((0x61 <= (c)) && ((c) <= 0x7a)) || \ + ((0xc0 <= (c)) && ((c) <= 0xd6)) || \ + ((0xd8 <= (c)) && ((c) <= 0xf6)) || \ + (0xf8 <= (c))) + +/** + * xmlIsBaseCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBaseCharQ(c) (((c) < 0x100) ? \ + xmlIsBaseChar_ch((c)) : \ + xmlCharInRange((c), &xmlIsBaseCharGroup)) + +XMLPUBVAR const xmlChRangeGroup xmlIsBaseCharGroup; + +/** + * xmlIsBlank_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBlank_ch(c) (((c) == 0x20) || \ + ((0x9 <= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd)) + +/** + * xmlIsBlankQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsBlankQ(c) (((c) < 0x100) ? \ + xmlIsBlank_ch((c)) : 0) + + +/** + * xmlIsChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsChar_ch(c) (((0x9 <= (c)) && ((c) <= 0xa)) || \ + ((c) == 0xd) || \ + (0x20 <= (c))) + +/** + * xmlIsCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsCharQ(c) (((c) < 0x100) ? \ + xmlIsChar_ch((c)) :\ + (((0x100 <= (c)) && ((c) <= 0xd7ff)) || \ + ((0xe000 <= (c)) && ((c) <= 0xfffd)) || \ + ((0x10000 <= (c)) && ((c) <= 0x10ffff)))) + +XMLPUBVAR const xmlChRangeGroup xmlIsCharGroup; + +/** + * xmlIsCombiningQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsCombiningQ(c) (((c) < 0x100) ? \ + 0 : \ + xmlCharInRange((c), &xmlIsCombiningGroup)) + +XMLPUBVAR const xmlChRangeGroup xmlIsCombiningGroup; + +/** + * xmlIsDigit_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsDigit_ch(c) (((0x30 <= (c)) && ((c) <= 0x39))) + +/** + * xmlIsDigitQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsDigitQ(c) (((c) < 0x100) ? \ + xmlIsDigit_ch((c)) : \ + xmlCharInRange((c), &xmlIsDigitGroup)) + +XMLPUBVAR const xmlChRangeGroup xmlIsDigitGroup; + +/** + * xmlIsExtender_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsExtender_ch(c) (((c) == 0xb7)) + +/** + * xmlIsExtenderQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsExtenderQ(c) (((c) < 0x100) ? \ + xmlIsExtender_ch((c)) : \ + xmlCharInRange((c), &xmlIsExtenderGroup)) + +XMLPUBVAR const xmlChRangeGroup xmlIsExtenderGroup; + +/** + * xmlIsIdeographicQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsIdeographicQ(c) (((c) < 0x100) ? \ + 0 :\ + (((0x4e00 <= (c)) && ((c) <= 0x9fa5)) || \ + ((c) == 0x3007) || \ + ((0x3021 <= (c)) && ((c) <= 0x3029)))) + +XMLPUBVAR const xmlChRangeGroup xmlIsIdeographicGroup; +XMLPUBVAR const unsigned char xmlIsPubidChar_tab[256]; + +/** + * xmlIsPubidChar_ch: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsPubidChar_ch(c) (xmlIsPubidChar_tab[(c)]) + +/** + * xmlIsPubidCharQ: + * @c: char to validate + * + * Automatically generated by genChRanges.py + */ +#define xmlIsPubidCharQ(c) (((c) < 0x100) ? \ + xmlIsPubidChar_ch((c)) : 0) + +XMLPUBFUN int + xmlIsBaseChar(unsigned int ch); +XMLPUBFUN int + xmlIsBlank(unsigned int ch); +XMLPUBFUN int + xmlIsChar(unsigned int ch); +XMLPUBFUN int + xmlIsCombining(unsigned int ch); +XMLPUBFUN int + xmlIsDigit(unsigned int ch); +XMLPUBFUN int + xmlIsExtender(unsigned int ch); +XMLPUBFUN int + xmlIsIdeographic(unsigned int ch); +XMLPUBFUN int + xmlIsPubidChar(unsigned int ch); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_CHVALID_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/debugXML.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/debugXML.h new file mode 100644 index 00000000..82746873 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/debugXML.h @@ -0,0 +1,217 @@ +/* + * Summary: Tree debugging APIs + * Description: Interfaces to a set of routines used for debugging the tree + * produced by the XML parser. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __DEBUG_XML__ +#define __DEBUG_XML__ +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/tree.h> + +#ifdef LIBXML_DEBUG_ENABLED + +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The standard Dump routines. + */ +XMLPUBFUN void + xmlDebugDumpString (FILE *output, + const xmlChar *str); +XMLPUBFUN void + xmlDebugDumpAttr (FILE *output, + xmlAttrPtr attr, + int depth); +XMLPUBFUN void + xmlDebugDumpAttrList (FILE *output, + xmlAttrPtr attr, + int depth); +XMLPUBFUN void + xmlDebugDumpOneNode (FILE *output, + xmlNodePtr node, + int depth); +XMLPUBFUN void + xmlDebugDumpNode (FILE *output, + xmlNodePtr node, + int depth); +XMLPUBFUN void + xmlDebugDumpNodeList (FILE *output, + xmlNodePtr node, + int depth); +XMLPUBFUN void + xmlDebugDumpDocumentHead(FILE *output, + xmlDocPtr doc); +XMLPUBFUN void + xmlDebugDumpDocument (FILE *output, + xmlDocPtr doc); +XMLPUBFUN void + xmlDebugDumpDTD (FILE *output, + xmlDtdPtr dtd); +XMLPUBFUN void + xmlDebugDumpEntities (FILE *output, + xmlDocPtr doc); + +/**************************************************************** + * * + * Checking routines * + * * + ****************************************************************/ + +XMLPUBFUN int + xmlDebugCheckDocument (FILE * output, + xmlDocPtr doc); + +/**************************************************************** + * * + * XML shell helpers * + * * + ****************************************************************/ + +XMLPUBFUN void + xmlLsOneNode (FILE *output, xmlNodePtr node); +XMLPUBFUN int + xmlLsCountNode (xmlNodePtr node); + +XMLPUBFUN const char * + xmlBoolToText (int boolval); + +/**************************************************************** + * * + * The XML shell related structures and functions * + * * + ****************************************************************/ + +#ifdef LIBXML_XPATH_ENABLED +/** + * xmlShellReadlineFunc: + * @prompt: a string prompt + * + * This is a generic signature for the XML shell input function. + * + * Returns a string which will be freed by the Shell. + */ +typedef char * (* xmlShellReadlineFunc)(char *prompt); + +/** + * xmlShellCtxt: + * + * A debugging shell context. + * TODO: add the defined function tables. + */ +typedef struct _xmlShellCtxt xmlShellCtxt; +typedef xmlShellCtxt *xmlShellCtxtPtr; +struct _xmlShellCtxt { + char *filename; + xmlDocPtr doc; + xmlNodePtr node; + xmlXPathContextPtr pctxt; + int loaded; + FILE *output; + xmlShellReadlineFunc input; +}; + +/** + * xmlShellCmd: + * @ctxt: a shell context + * @arg: a string argument + * @node: a first node + * @node2: a second node + * + * This is a generic signature for the XML shell functions. + * + * Returns an int, negative returns indicating errors. + */ +typedef int (* xmlShellCmd) (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); + +XMLPUBFUN void + xmlShellPrintXPathError (int errorType, + const char *arg); +XMLPUBFUN void + xmlShellPrintXPathResult(xmlXPathObjectPtr list); +XMLPUBFUN int + xmlShellList (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellBase (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellDir (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellLoad (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlShellPrintNode (xmlNodePtr node); +XMLPUBFUN int + xmlShellCat (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr node, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellWrite (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellSave (xmlShellCtxtPtr ctxt, + char *filename, + xmlNodePtr node, + xmlNodePtr node2); +#endif /* LIBXML_OUTPUT_ENABLED */ +#ifdef LIBXML_VALID_ENABLED +XMLPUBFUN int + xmlShellValidate (xmlShellCtxtPtr ctxt, + char *dtd, + xmlNodePtr node, + xmlNodePtr node2); +#endif /* LIBXML_VALID_ENABLED */ +XMLPUBFUN int + xmlShellDu (xmlShellCtxtPtr ctxt, + char *arg, + xmlNodePtr tree, + xmlNodePtr node2); +XMLPUBFUN int + xmlShellPwd (xmlShellCtxtPtr ctxt, + char *buffer, + xmlNodePtr node, + xmlNodePtr node2); + +/* + * The Shell interface. + */ +XMLPUBFUN void + xmlShell (xmlDocPtr doc, + char *filename, + xmlShellReadlineFunc input, + FILE *output); + +#endif /* LIBXML_XPATH_ENABLED */ + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_DEBUG_ENABLED */ +#endif /* __DEBUG_XML__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/dict.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/dict.h new file mode 100644 index 00000000..22aa3d9d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/dict.h @@ -0,0 +1,82 @@ +/* + * Summary: string dictionary + * Description: dictionary of reusable strings, just used to avoid allocation + * and freeing operations. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_DICT_H__ +#define __XML_DICT_H__ + +#include <stddef.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The dictionary. + */ +typedef struct _xmlDict xmlDict; +typedef xmlDict *xmlDictPtr; + +/* + * Initializer + */ +XML_DEPRECATED +XMLPUBFUN int xmlInitializeDict(void); + +/* + * Constructor and destructor. + */ +XMLPUBFUN xmlDictPtr + xmlDictCreate (void); +XMLPUBFUN size_t + xmlDictSetLimit (xmlDictPtr dict, + size_t limit); +XMLPUBFUN size_t + xmlDictGetUsage (xmlDictPtr dict); +XMLPUBFUN xmlDictPtr + xmlDictCreateSub(xmlDictPtr sub); +XMLPUBFUN int + xmlDictReference(xmlDictPtr dict); +XMLPUBFUN void + xmlDictFree (xmlDictPtr dict); + +/* + * Lookup of entry in the dictionary. + */ +XMLPUBFUN const xmlChar * + xmlDictLookup (xmlDictPtr dict, + const xmlChar *name, + int len); +XMLPUBFUN const xmlChar * + xmlDictExists (xmlDictPtr dict, + const xmlChar *name, + int len); +XMLPUBFUN const xmlChar * + xmlDictQLookup (xmlDictPtr dict, + const xmlChar *prefix, + const xmlChar *name); +XMLPUBFUN int + xmlDictOwns (xmlDictPtr dict, + const xmlChar *str); +XMLPUBFUN int + xmlDictSize (xmlDictPtr dict); + +/* + * Cleanup function + */ +XML_DEPRECATED +XMLPUBFUN void + xmlDictCleanup (void); + +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_DICT_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/encoding.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/encoding.h new file mode 100644 index 00000000..8594cffc --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/encoding.h @@ -0,0 +1,235 @@ +/* + * Summary: interface for the encoding conversion functions + * Description: interface for the encoding conversion functions needed for + * XML basic encoding and iconv() support. + * + * Related specs are + * rfc2044 (UTF-8 and UTF-16) F. Yergeau Alis Technologies + * [ISO-10646] UTF-8 and UTF-16 in Annexes + * [ISO-8859-1] ISO Latin-1 characters codes. + * [UNICODE] The Unicode Consortium, "The Unicode Standard -- + * Worldwide Character Encoding -- Version 1.0", Addison- + * Wesley, Volume 1, 1991, Volume 2, 1992. UTF-8 is + * described in Unicode Technical Report #4. + * [US-ASCII] Coded Character Set--7-bit American Standard Code for + * Information Interchange, ANSI X3.4-1986. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_CHAR_ENCODING_H__ +#define __XML_CHAR_ENCODING_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_ICONV_ENABLED +#include <iconv.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_ENC_ERR_SUCCESS = 0, + XML_ENC_ERR_SPACE = -1, + XML_ENC_ERR_INPUT = -2, + XML_ENC_ERR_PARTIAL = -3, + XML_ENC_ERR_INTERNAL = -4, + XML_ENC_ERR_MEMORY = -5 +} xmlCharEncError; + +/* + * xmlCharEncoding: + * + * Predefined values for some standard encodings. + * Libxml does not do beforehand translation on UTF8 and ISOLatinX. + * It also supports ASCII, ISO-8859-1, and UTF16 (LE and BE) by default. + * + * Anything else would have to be translated to UTF8 before being + * given to the parser itself. The BOM for UTF16 and the encoding + * declaration are looked at and a converter is looked for at that + * point. If not found the parser stops here as asked by the XML REC. A + * converter can be registered by the user using xmlRegisterCharEncodingHandler + * but the current form doesn't allow stateful transcoding (a serious + * problem agreed !). If iconv has been found it will be used + * automatically and allow stateful transcoding, the simplest is then + * to be sure to enable iconv and to provide iconv libs for the encoding + * support needed. + * + * Note that the generic "UTF-16" is not a predefined value. Instead, only + * the specific UTF-16LE and UTF-16BE are present. + */ +typedef enum { + XML_CHAR_ENCODING_ERROR= -1, /* No char encoding detected */ + XML_CHAR_ENCODING_NONE= 0, /* No char encoding detected */ + XML_CHAR_ENCODING_UTF8= 1, /* UTF-8 */ + XML_CHAR_ENCODING_UTF16LE= 2, /* UTF-16 little endian */ + XML_CHAR_ENCODING_UTF16BE= 3, /* UTF-16 big endian */ + XML_CHAR_ENCODING_UCS4LE= 4, /* UCS-4 little endian */ + XML_CHAR_ENCODING_UCS4BE= 5, /* UCS-4 big endian */ + XML_CHAR_ENCODING_EBCDIC= 6, /* EBCDIC uh! */ + XML_CHAR_ENCODING_UCS4_2143=7, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS4_3412=8, /* UCS-4 unusual ordering */ + XML_CHAR_ENCODING_UCS2= 9, /* UCS-2 */ + XML_CHAR_ENCODING_8859_1= 10,/* ISO-8859-1 ISO Latin 1 */ + XML_CHAR_ENCODING_8859_2= 11,/* ISO-8859-2 ISO Latin 2 */ + XML_CHAR_ENCODING_8859_3= 12,/* ISO-8859-3 */ + XML_CHAR_ENCODING_8859_4= 13,/* ISO-8859-4 */ + XML_CHAR_ENCODING_8859_5= 14,/* ISO-8859-5 */ + XML_CHAR_ENCODING_8859_6= 15,/* ISO-8859-6 */ + XML_CHAR_ENCODING_8859_7= 16,/* ISO-8859-7 */ + XML_CHAR_ENCODING_8859_8= 17,/* ISO-8859-8 */ + XML_CHAR_ENCODING_8859_9= 18,/* ISO-8859-9 */ + XML_CHAR_ENCODING_2022_JP= 19,/* ISO-2022-JP */ + XML_CHAR_ENCODING_SHIFT_JIS=20,/* Shift_JIS */ + XML_CHAR_ENCODING_EUC_JP= 21,/* EUC-JP */ + XML_CHAR_ENCODING_ASCII= 22 /* pure ASCII */ +} xmlCharEncoding; + +/** + * xmlCharEncodingInputFunc: + * @out: a pointer to an array of bytes to store the UTF-8 result + * @outlen: the length of @out + * @in: a pointer to an array of chars in the original encoding + * @inlen: the length of @in + * + * Take a block of chars in the original encoding and try to convert + * it to an UTF-8 block of chars out. + * + * Returns the number of bytes written, -1 if lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * if the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of octets consumed. + */ +typedef int (* xmlCharEncodingInputFunc)(unsigned char *out, int *outlen, + const unsigned char *in, int *inlen); + + +/** + * xmlCharEncodingOutputFunc: + * @out: a pointer to an array of bytes to store the result + * @outlen: the length of @out + * @in: a pointer to an array of UTF-8 chars + * @inlen: the length of @in + * + * Take a block of UTF-8 chars in and try to convert it to another + * encoding. + * Note: a first call designed to produce heading info is called with + * in = NULL. If stateful this should also initialize the encoder state. + * + * Returns the number of bytes written, -1 if lack of space, or -2 + * if the transcoding failed. + * The value of @inlen after return is the number of octets consumed + * if the return value is positive, else unpredictiable. + * The value of @outlen after return is the number of octets produced. + */ +typedef int (* xmlCharEncodingOutputFunc)(unsigned char *out, int *outlen, + const unsigned char *in, int *inlen); + + +/* + * Block defining the handlers for non UTF-8 encodings. + * If iconv is supported, there are two extra fields. + */ +typedef struct _xmlCharEncodingHandler xmlCharEncodingHandler; +typedef xmlCharEncodingHandler *xmlCharEncodingHandlerPtr; +struct _xmlCharEncodingHandler { + char *name; + xmlCharEncodingInputFunc input; + xmlCharEncodingOutputFunc output; +#ifdef LIBXML_ICONV_ENABLED + iconv_t iconv_in; + iconv_t iconv_out; +#endif /* LIBXML_ICONV_ENABLED */ +#ifdef LIBXML_ICU_ENABLED + struct _uconv_t *uconv_in; + struct _uconv_t *uconv_out; +#endif /* LIBXML_ICU_ENABLED */ +}; + +/* + * Interfaces for encoding handlers. + */ +XML_DEPRECATED +XMLPUBFUN void + xmlInitCharEncodingHandlers (void); +XML_DEPRECATED +XMLPUBFUN void + xmlCleanupCharEncodingHandlers (void); +XMLPUBFUN void + xmlRegisterCharEncodingHandler (xmlCharEncodingHandlerPtr handler); +XMLPUBFUN xmlCharEncodingHandlerPtr + xmlGetCharEncodingHandler (xmlCharEncoding enc); +XMLPUBFUN xmlCharEncodingHandlerPtr + xmlFindCharEncodingHandler (const char *name); +XMLPUBFUN xmlCharEncodingHandlerPtr + xmlNewCharEncodingHandler (const char *name, + xmlCharEncodingInputFunc input, + xmlCharEncodingOutputFunc output); + +/* + * Interfaces for encoding names and aliases. + */ +XMLPUBFUN int + xmlAddEncodingAlias (const char *name, + const char *alias); +XMLPUBFUN int + xmlDelEncodingAlias (const char *alias); +XMLPUBFUN const char * + xmlGetEncodingAlias (const char *alias); +XMLPUBFUN void + xmlCleanupEncodingAliases (void); +XMLPUBFUN xmlCharEncoding + xmlParseCharEncoding (const char *name); +XMLPUBFUN const char * + xmlGetCharEncodingName (xmlCharEncoding enc); + +/* + * Interfaces directly used by the parsers. + */ +XMLPUBFUN xmlCharEncoding + xmlDetectCharEncoding (const unsigned char *in, + int len); + +struct _xmlBuffer; +XMLPUBFUN int + xmlCharEncOutFunc (xmlCharEncodingHandler *handler, + struct _xmlBuffer *out, + struct _xmlBuffer *in); + +XMLPUBFUN int + xmlCharEncInFunc (xmlCharEncodingHandler *handler, + struct _xmlBuffer *out, + struct _xmlBuffer *in); +XML_DEPRECATED +XMLPUBFUN int + xmlCharEncFirstLine (xmlCharEncodingHandler *handler, + struct _xmlBuffer *out, + struct _xmlBuffer *in); +XMLPUBFUN int + xmlCharEncCloseFunc (xmlCharEncodingHandler *handler); + +/* + * Export a few useful functions + */ +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN int + UTF8Toisolat1 (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +#endif /* LIBXML_OUTPUT_ENABLED */ +XMLPUBFUN int + isolat1ToUTF8 (unsigned char *out, + int *outlen, + const unsigned char *in, + int *inlen); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_CHAR_ENCODING_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/entities.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/entities.h new file mode 100644 index 00000000..f6793753 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/entities.h @@ -0,0 +1,155 @@ +/* + * Summary: interface for the XML entities handling + * Description: this module provides some of the entity API needed + * for the parser and applications. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_ENTITIES_H__ +#define __XML_ENTITIES_H__ + +#include <libxml/xmlversion.h> +#define XML_TREE_INTERNALS +#include <libxml/tree.h> +#undef XML_TREE_INTERNALS + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The different valid entity types. + */ +typedef enum { + XML_INTERNAL_GENERAL_ENTITY = 1, + XML_EXTERNAL_GENERAL_PARSED_ENTITY = 2, + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY = 3, + XML_INTERNAL_PARAMETER_ENTITY = 4, + XML_EXTERNAL_PARAMETER_ENTITY = 5, + XML_INTERNAL_PREDEFINED_ENTITY = 6 +} xmlEntityType; + +/* + * An unit of storage for an entity, contains the string, the value + * and the linkind data needed for the linking in the hash table. + */ + +struct _xmlEntity { + void *_private; /* application data */ + xmlElementType type; /* XML_ENTITY_DECL, must be second ! */ + const xmlChar *name; /* Entity name */ + struct _xmlNode *children; /* First child link */ + struct _xmlNode *last; /* Last child link */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlChar *orig; /* content without ref substitution */ + xmlChar *content; /* content or ndata if unparsed */ + int length; /* the content length */ + xmlEntityType etype; /* The entity type */ + const xmlChar *ExternalID; /* External identifier for PUBLIC */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC Entity */ + + struct _xmlEntity *nexte; /* unused */ + const xmlChar *URI; /* the full URI as computed */ + int owner; /* does the entity own the childrens */ + int flags; /* various flags */ + unsigned long expandedSize; /* expanded size */ +}; + +/* + * All entities are stored in an hash table. + * There is 2 separate hash tables for global and parameter entities. + */ + +typedef struct _xmlHashTable xmlEntitiesTable; +typedef xmlEntitiesTable *xmlEntitiesTablePtr; + +/* + * External functions: + */ + +#ifdef LIBXML_LEGACY_ENABLED +XML_DEPRECATED +XMLPUBFUN void + xmlInitializePredefinedEntities (void); +#endif /* LIBXML_LEGACY_ENABLED */ + +XMLPUBFUN xmlEntityPtr + xmlNewEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +XMLPUBFUN void + xmlFreeEntity (xmlEntityPtr entity); +XMLPUBFUN xmlEntityPtr + xmlAddDocEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +XMLPUBFUN xmlEntityPtr + xmlAddDtdEntity (xmlDocPtr doc, + const xmlChar *name, + int type, + const xmlChar *ExternalID, + const xmlChar *SystemID, + const xmlChar *content); +XMLPUBFUN xmlEntityPtr + xmlGetPredefinedEntity (const xmlChar *name); +XMLPUBFUN xmlEntityPtr + xmlGetDocEntity (const xmlDoc *doc, + const xmlChar *name); +XMLPUBFUN xmlEntityPtr + xmlGetDtdEntity (xmlDocPtr doc, + const xmlChar *name); +XMLPUBFUN xmlEntityPtr + xmlGetParameterEntity (xmlDocPtr doc, + const xmlChar *name); +#ifdef LIBXML_LEGACY_ENABLED +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlEncodeEntities (xmlDocPtr doc, + const xmlChar *input); +#endif /* LIBXML_LEGACY_ENABLED */ +XMLPUBFUN xmlChar * + xmlEncodeEntitiesReentrant(xmlDocPtr doc, + const xmlChar *input); +XMLPUBFUN xmlChar * + xmlEncodeSpecialChars (const xmlDoc *doc, + const xmlChar *input); +XMLPUBFUN xmlEntitiesTablePtr + xmlCreateEntitiesTable (void); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlEntitiesTablePtr + xmlCopyEntitiesTable (xmlEntitiesTablePtr table); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlFreeEntitiesTable (xmlEntitiesTablePtr table); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlDumpEntitiesTable (xmlBufferPtr buf, + xmlEntitiesTablePtr table); +XMLPUBFUN void + xmlDumpEntityDecl (xmlBufferPtr buf, + xmlEntityPtr ent); +#endif /* LIBXML_OUTPUT_ENABLED */ +#ifdef LIBXML_LEGACY_ENABLED +XMLPUBFUN void + xmlCleanupPredefinedEntities(void); +#endif /* LIBXML_LEGACY_ENABLED */ + + +#ifdef __cplusplus +} +#endif + +# endif /* __XML_ENTITIES_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/globals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/globals.h new file mode 100644 index 00000000..92f41312 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/globals.h @@ -0,0 +1,41 @@ +/* + * Summary: interface for all global variables of the library + * Description: Deprecated, don't use + * + * Copy: See Copyright for the status of this software. + */ + +#ifndef __XML_GLOBALS_H +#define __XML_GLOBALS_H + +#include <libxml/xmlversion.h> + +/* + * This file was required to access global variables until version v2.12.0. + * + * These includes are for backward compatibility. + */ +#include <libxml/HTMLparser.h> +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlIO.h> +#include <libxml/xmlsave.h> +#include <libxml/threads.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlGlobalState xmlGlobalState; +typedef xmlGlobalState *xmlGlobalStatePtr; + +XML_DEPRECATED XMLPUBFUN void +xmlInitializeGlobalState(xmlGlobalStatePtr gs); +XML_DEPRECATED XMLPUBFUN +xmlGlobalStatePtr xmlGetGlobalState(void); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_GLOBALS_H */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/hash.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/hash.h new file mode 100644 index 00000000..f4af09ee --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/hash.h @@ -0,0 +1,232 @@ +/* + * Summary: Chained hash tables + * Description: This module implements the hash table support used in + * various places in the library. + * + * Copy: See Copyright for the status of this software. + * + * Author: Bjorn Reese <bjorn.reese@systematic.dk> + */ + +#ifndef __XML_HASH_H__ +#define __XML_HASH_H__ + +#include <libxml/xmlversion.h> +#include <libxml/dict.h> +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The hash table. + */ +typedef struct _xmlHashTable xmlHashTable; +typedef xmlHashTable *xmlHashTablePtr; + +/* + * Recent version of gcc produce a warning when a function pointer is assigned + * to an object pointer, or vice versa. The following macro is a dirty hack + * to allow suppression of the warning. If your architecture has function + * pointers which are a different size than a void pointer, there may be some + * serious trouble within the library. + */ +/** + * XML_CAST_FPTR: + * @fptr: pointer to a function + * + * Macro to do a casting from an object pointer to a + * function pointer without encountering a warning from + * gcc + * + * #define XML_CAST_FPTR(fptr) (*(void **)(&fptr)) + * This macro violated ISO C aliasing rules (gcc4 on s390 broke) + * so it is disabled now + */ + +#define XML_CAST_FPTR(fptr) fptr + +/* + * function types: + */ +/** + * xmlHashDeallocator: + * @payload: the data in the hash + * @name: the name associated + * + * Callback to free data from a hash. + */ +typedef void (*xmlHashDeallocator)(void *payload, const xmlChar *name); +/** + * xmlHashCopier: + * @payload: the data in the hash + * @name: the name associated + * + * Callback to copy data from a hash. + * + * Returns a copy of the data or NULL in case of error. + */ +typedef void *(*xmlHashCopier)(void *payload, const xmlChar *name); +/** + * xmlHashScanner: + * @payload: the data in the hash + * @data: extra scanner data + * @name: the name associated + * + * Callback when scanning data in a hash with the simple scanner. + */ +typedef void (*xmlHashScanner)(void *payload, void *data, const xmlChar *name); +/** + * xmlHashScannerFull: + * @payload: the data in the hash + * @data: extra scanner data + * @name: the name associated + * @name2: the second name associated + * @name3: the third name associated + * + * Callback when scanning data in a hash with the full scanner. + */ +typedef void (*xmlHashScannerFull)(void *payload, void *data, + const xmlChar *name, const xmlChar *name2, + const xmlChar *name3); + +/* + * Constructor and destructor. + */ +XMLPUBFUN xmlHashTablePtr + xmlHashCreate (int size); +XMLPUBFUN xmlHashTablePtr + xmlHashCreateDict (int size, + xmlDictPtr dict); +XMLPUBFUN void + xmlHashFree (xmlHashTablePtr hash, + xmlHashDeallocator dealloc); +XMLPUBFUN void + xmlHashDefaultDeallocator(void *entry, + const xmlChar *name); + +/* + * Add a new entry to the hash table. + */ +XMLPUBFUN int + xmlHashAddEntry (xmlHashTablePtr hash, + const xmlChar *name, + void *userdata); +XMLPUBFUN int + xmlHashUpdateEntry (xmlHashTablePtr hash, + const xmlChar *name, + void *userdata, + xmlHashDeallocator dealloc); +XMLPUBFUN int + xmlHashAddEntry2 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + void *userdata); +XMLPUBFUN int + xmlHashUpdateEntry2 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + void *userdata, + xmlHashDeallocator dealloc); +XMLPUBFUN int + xmlHashAddEntry3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata); +XMLPUBFUN int + xmlHashUpdateEntry3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + void *userdata, + xmlHashDeallocator dealloc); + +/* + * Remove an entry from the hash table. + */ +XMLPUBFUN int + xmlHashRemoveEntry (xmlHashTablePtr hash, + const xmlChar *name, + xmlHashDeallocator dealloc); +XMLPUBFUN int + xmlHashRemoveEntry2 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + xmlHashDeallocator dealloc); +XMLPUBFUN int + xmlHashRemoveEntry3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashDeallocator dealloc); + +/* + * Retrieve the payload. + */ +XMLPUBFUN void * + xmlHashLookup (xmlHashTablePtr hash, + const xmlChar *name); +XMLPUBFUN void * + xmlHashLookup2 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2); +XMLPUBFUN void * + xmlHashLookup3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3); +XMLPUBFUN void * + xmlHashQLookup (xmlHashTablePtr hash, + const xmlChar *prefix, + const xmlChar *name); +XMLPUBFUN void * + xmlHashQLookup2 (xmlHashTablePtr hash, + const xmlChar *prefix, + const xmlChar *name, + const xmlChar *prefix2, + const xmlChar *name2); +XMLPUBFUN void * + xmlHashQLookup3 (xmlHashTablePtr hash, + const xmlChar *prefix, + const xmlChar *name, + const xmlChar *prefix2, + const xmlChar *name2, + const xmlChar *prefix3, + const xmlChar *name3); + +/* + * Helpers. + */ +XMLPUBFUN xmlHashTablePtr + xmlHashCopy (xmlHashTablePtr hash, + xmlHashCopier copy); +XMLPUBFUN int + xmlHashSize (xmlHashTablePtr hash); +XMLPUBFUN void + xmlHashScan (xmlHashTablePtr hash, + xmlHashScanner scan, + void *data); +XMLPUBFUN void + xmlHashScan3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScanner scan, + void *data); +XMLPUBFUN void + xmlHashScanFull (xmlHashTablePtr hash, + xmlHashScannerFull scan, + void *data); +XMLPUBFUN void + xmlHashScanFull3 (xmlHashTablePtr hash, + const xmlChar *name, + const xmlChar *name2, + const xmlChar *name3, + xmlHashScannerFull scan, + void *data); +#ifdef __cplusplus +} +#endif +#endif /* ! __XML_HASH_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/list.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/list.h new file mode 100644 index 00000000..5eab8f59 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/list.h @@ -0,0 +1,137 @@ +/* + * Summary: lists interfaces + * Description: this module implement the list support used in + * various place in the library. + * + * Copy: See Copyright for the status of this software. + * + * Author: Gary Pennington <Gary.Pennington@uk.sun.com> + */ + +#ifndef __XML_LINK_INCLUDE__ +#define __XML_LINK_INCLUDE__ + +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlLink xmlLink; +typedef xmlLink *xmlLinkPtr; + +typedef struct _xmlList xmlList; +typedef xmlList *xmlListPtr; + +/** + * xmlListDeallocator: + * @lk: the data to deallocate + * + * Callback function used to free data from a list. + */ +typedef void (*xmlListDeallocator) (xmlLinkPtr lk); +/** + * xmlListDataCompare: + * @data0: the first data + * @data1: the second data + * + * Callback function used to compare 2 data. + * + * Returns 0 is equality, -1 or 1 otherwise depending on the ordering. + */ +typedef int (*xmlListDataCompare) (const void *data0, const void *data1); +/** + * xmlListWalker: + * @data: the data found in the list + * @user: extra user provided data to the walker + * + * Callback function used when walking a list with xmlListWalk(). + * + * Returns 0 to stop walking the list, 1 otherwise. + */ +typedef int (*xmlListWalker) (const void *data, void *user); + +/* Creation/Deletion */ +XMLPUBFUN xmlListPtr + xmlListCreate (xmlListDeallocator deallocator, + xmlListDataCompare compare); +XMLPUBFUN void + xmlListDelete (xmlListPtr l); + +/* Basic Operators */ +XMLPUBFUN void * + xmlListSearch (xmlListPtr l, + void *data); +XMLPUBFUN void * + xmlListReverseSearch (xmlListPtr l, + void *data); +XMLPUBFUN int + xmlListInsert (xmlListPtr l, + void *data) ; +XMLPUBFUN int + xmlListAppend (xmlListPtr l, + void *data) ; +XMLPUBFUN int + xmlListRemoveFirst (xmlListPtr l, + void *data); +XMLPUBFUN int + xmlListRemoveLast (xmlListPtr l, + void *data); +XMLPUBFUN int + xmlListRemoveAll (xmlListPtr l, + void *data); +XMLPUBFUN void + xmlListClear (xmlListPtr l); +XMLPUBFUN int + xmlListEmpty (xmlListPtr l); +XMLPUBFUN xmlLinkPtr + xmlListFront (xmlListPtr l); +XMLPUBFUN xmlLinkPtr + xmlListEnd (xmlListPtr l); +XMLPUBFUN int + xmlListSize (xmlListPtr l); + +XMLPUBFUN void + xmlListPopFront (xmlListPtr l); +XMLPUBFUN void + xmlListPopBack (xmlListPtr l); +XMLPUBFUN int + xmlListPushFront (xmlListPtr l, + void *data); +XMLPUBFUN int + xmlListPushBack (xmlListPtr l, + void *data); + +/* Advanced Operators */ +XMLPUBFUN void + xmlListReverse (xmlListPtr l); +XMLPUBFUN void + xmlListSort (xmlListPtr l); +XMLPUBFUN void + xmlListWalk (xmlListPtr l, + xmlListWalker walker, + void *user); +XMLPUBFUN void + xmlListReverseWalk (xmlListPtr l, + xmlListWalker walker, + void *user); +XMLPUBFUN void + xmlListMerge (xmlListPtr l1, + xmlListPtr l2); +XMLPUBFUN xmlListPtr + xmlListDup (const xmlListPtr old); +XMLPUBFUN int + xmlListCopy (xmlListPtr cur, + const xmlListPtr old); +/* Link operators */ +XMLPUBFUN void * + xmlLinkGetData (xmlLinkPtr lk); + +/* xmlListUnique() */ +/* xmlListSwap */ + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_LINK_INCLUDE__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanoftp.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanoftp.h new file mode 100644 index 00000000..ed3ac4f1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanoftp.h @@ -0,0 +1,186 @@ +/* + * Summary: minimal FTP implementation + * Description: minimal FTP implementation allowing to fetch resources + * like external subset. This module is DEPRECATED, do not + * use any of its functions. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __NANO_FTP_H__ +#define __NANO_FTP_H__ + +#include <libxml/xmlversion.h> + +#if defined(LIBXML_FTP_ENABLED) + +/* Needed for portability to Windows 64 bits */ +#if defined(_WIN32) +#include <winsock2.h> +#else +/** + * SOCKET: + * + * macro used to provide portability of code to windows sockets + */ +#define SOCKET int +/** + * INVALID_SOCKET: + * + * macro used to provide portability of code to windows sockets + * the value to be used when the socket is not valid + */ +#undef INVALID_SOCKET +#define INVALID_SOCKET (-1) +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ftpListCallback: + * @userData: user provided data for the callback + * @filename: the file name (including "->" when links are shown) + * @attrib: the attribute string + * @owner: the owner string + * @group: the group string + * @size: the file size + * @links: the link count + * @year: the year + * @month: the month + * @day: the day + * @hour: the hour + * @minute: the minute + * + * A callback for the xmlNanoFTPList command. + * Note that only one of year and day:minute are specified. + */ +typedef void (*ftpListCallback) (void *userData, + const char *filename, const char *attrib, + const char *owner, const char *group, + unsigned long size, int links, int year, + const char *month, int day, int hour, + int minute); +/** + * ftpDataCallback: + * @userData: the user provided context + * @data: the data received + * @len: its size in bytes + * + * A callback for the xmlNanoFTPGet command. + */ +typedef void (*ftpDataCallback) (void *userData, + const char *data, + int len); + +/* + * Init + */ +XML_DEPRECATED +XMLPUBFUN void + xmlNanoFTPInit (void); +XML_DEPRECATED +XMLPUBFUN void + xmlNanoFTPCleanup (void); + +/* + * Creating/freeing contexts. + */ +XML_DEPRECATED +XMLPUBFUN void * + xmlNanoFTPNewCtxt (const char *URL); +XML_DEPRECATED +XMLPUBFUN void + xmlNanoFTPFreeCtxt (void * ctx); +XML_DEPRECATED +XMLPUBFUN void * + xmlNanoFTPConnectTo (const char *server, + int port); +/* + * Opening/closing session connections. + */ +XML_DEPRECATED +XMLPUBFUN void * + xmlNanoFTPOpen (const char *URL); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPConnect (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPClose (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPQuit (void *ctx); +XML_DEPRECATED +XMLPUBFUN void + xmlNanoFTPScanProxy (const char *URL); +XML_DEPRECATED +XMLPUBFUN void + xmlNanoFTPProxy (const char *host, + int port, + const char *user, + const char *passwd, + int type); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPUpdateURL (void *ctx, + const char *URL); + +/* + * Rather internal commands. + */ +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPGetResponse (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPCheckResponse (void *ctx); + +/* + * CD/DIR/GET handlers. + */ +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPCwd (void *ctx, + const char *directory); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPDele (void *ctx, + const char *file); + +XML_DEPRECATED +XMLPUBFUN SOCKET + xmlNanoFTPGetConnection (void *ctx); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPCloseConnection(void *ctx); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPList (void *ctx, + ftpListCallback callback, + void *userData, + const char *filename); +XML_DEPRECATED +XMLPUBFUN SOCKET + xmlNanoFTPGetSocket (void *ctx, + const char *filename); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPGet (void *ctx, + ftpDataCallback callback, + void *userData, + const char *filename); +XML_DEPRECATED +XMLPUBFUN int + xmlNanoFTPRead (void *ctx, + void *dest, + int len); + +#ifdef __cplusplus +} +#endif +#endif /* defined(LIBXML_FTP_ENABLED) || defined(LIBXML_LEGACY_ENABLED) */ +#endif /* __NANO_FTP_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanohttp.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanohttp.h new file mode 100644 index 00000000..3b5e037f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/nanohttp.h @@ -0,0 +1,81 @@ +/* + * Summary: minimal HTTP implementation + * Description: minimal HTTP implementation allowing to fetch resources + * like external subset. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __NANO_HTTP_H__ +#define __NANO_HTTP_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_HTTP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif +XMLPUBFUN void + xmlNanoHTTPInit (void); +XMLPUBFUN void + xmlNanoHTTPCleanup (void); +XMLPUBFUN void + xmlNanoHTTPScanProxy (const char *URL); +XMLPUBFUN int + xmlNanoHTTPFetch (const char *URL, + const char *filename, + char **contentType); +XMLPUBFUN void * + xmlNanoHTTPMethod (const char *URL, + const char *method, + const char *input, + char **contentType, + const char *headers, + int ilen); +XMLPUBFUN void * + xmlNanoHTTPMethodRedir (const char *URL, + const char *method, + const char *input, + char **contentType, + char **redir, + const char *headers, + int ilen); +XMLPUBFUN void * + xmlNanoHTTPOpen (const char *URL, + char **contentType); +XMLPUBFUN void * + xmlNanoHTTPOpenRedir (const char *URL, + char **contentType, + char **redir); +XMLPUBFUN int + xmlNanoHTTPReturnCode (void *ctx); +XMLPUBFUN const char * + xmlNanoHTTPAuthHeader (void *ctx); +XMLPUBFUN const char * + xmlNanoHTTPRedir (void *ctx); +XMLPUBFUN int + xmlNanoHTTPContentLength( void * ctx ); +XMLPUBFUN const char * + xmlNanoHTTPEncoding (void *ctx); +XMLPUBFUN const char * + xmlNanoHTTPMimeType (void *ctx); +XMLPUBFUN int + xmlNanoHTTPRead (void *ctx, + void *dest, + int len); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN int + xmlNanoHTTPSave (void *ctxt, + const char *filename); +#endif /* LIBXML_OUTPUT_ENABLED */ +XMLPUBFUN void + xmlNanoHTTPClose (void *ctx); +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_HTTP_ENABLED */ +#endif /* __NANO_HTTP_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parser.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parser.h new file mode 100644 index 00000000..fd6d5207 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parser.h @@ -0,0 +1,1384 @@ +/* + * Summary: the core parser module + * Description: Interfaces, constants and types related to the XML parser + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_PARSER_H__ +#define __XML_PARSER_H__ + +#include <libxml/xmlversion.h> +#define XML_TREE_INTERNALS +#include <libxml/tree.h> +#undef XML_TREE_INTERNALS +#include <libxml/dict.h> +#include <libxml/hash.h> +#include <libxml/valid.h> +#include <libxml/entities.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlstring.h> +#include <libxml/xmlmemory.h> +#include <libxml/encoding.h> +#include <libxml/xmlIO.h> +/* for compatibility */ +#include <libxml/SAX2.h> +#include <libxml/threads.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XML_DEFAULT_VERSION: + * + * The default version of XML used: 1.0 + */ +#define XML_DEFAULT_VERSION "1.0" + +/** + * xmlParserInput: + * + * An xmlParserInput is an input flow for the XML processor. + * Each entity parsed is associated an xmlParserInput (except the + * few predefined ones). This is the case both for internal entities + * - in which case the flow is already completely in memory - or + * external entities - in which case we use the buf structure for + * progressive reading and I18N conversions to the internal UTF-8 format. + */ + +/** + * xmlParserInputDeallocate: + * @str: the string to deallocate + * + * Callback for freeing some parser input allocations. + */ +typedef void (* xmlParserInputDeallocate)(xmlChar *str); + +struct _xmlParserInput { + /* Input buffer */ + xmlParserInputBufferPtr buf; /* UTF-8 encoded buffer */ + + const char *filename; /* The file analyzed, if any */ + const char *directory; /* the directory/base of the file */ + const xmlChar *base; /* Base of the array to parse */ + const xmlChar *cur; /* Current char being parsed */ + const xmlChar *end; /* end of the array to parse */ + int length; /* length if known */ + int line; /* Current line */ + int col; /* Current column */ + unsigned long consumed; /* How many xmlChars already consumed */ + xmlParserInputDeallocate free; /* function to deallocate the base */ + const xmlChar *encoding; /* unused */ + const xmlChar *version; /* the version string for entity */ + int flags; /* Flags */ + int id; /* an unique identifier for the entity */ + unsigned long parentConsumed; /* consumed bytes from parents */ + xmlEntityPtr entity; /* entity, if any */ +}; + +/** + * xmlParserNodeInfo: + * + * The parser can be asked to collect Node information, i.e. at what + * place in the file they were detected. + * NOTE: This is off by default and not very well tested. + */ +typedef struct _xmlParserNodeInfo xmlParserNodeInfo; +typedef xmlParserNodeInfo *xmlParserNodeInfoPtr; + +struct _xmlParserNodeInfo { + const struct _xmlNode* node; + /* Position & line # that text that created the node begins & ends on */ + unsigned long begin_pos; + unsigned long begin_line; + unsigned long end_pos; + unsigned long end_line; +}; + +typedef struct _xmlParserNodeInfoSeq xmlParserNodeInfoSeq; +typedef xmlParserNodeInfoSeq *xmlParserNodeInfoSeqPtr; +struct _xmlParserNodeInfoSeq { + unsigned long maximum; + unsigned long length; + xmlParserNodeInfo* buffer; +}; + +/** + * xmlParserInputState: + * + * The parser is now working also as a state based parser. + * The recursive one use the state info for entities processing. + */ +typedef enum { + XML_PARSER_EOF = -1, /* nothing is to be parsed */ + XML_PARSER_START = 0, /* nothing has been parsed */ + XML_PARSER_MISC, /* Misc* before int subset */ + XML_PARSER_PI, /* Within a processing instruction */ + XML_PARSER_DTD, /* within some DTD content */ + XML_PARSER_PROLOG, /* Misc* after internal subset */ + XML_PARSER_COMMENT, /* within a comment */ + XML_PARSER_START_TAG, /* within a start tag */ + XML_PARSER_CONTENT, /* within the content */ + XML_PARSER_CDATA_SECTION, /* within a CDATA section */ + XML_PARSER_END_TAG, /* within a closing tag */ + XML_PARSER_ENTITY_DECL, /* within an entity declaration */ + XML_PARSER_ENTITY_VALUE, /* within an entity value in a decl */ + XML_PARSER_ATTRIBUTE_VALUE, /* within an attribute value */ + XML_PARSER_SYSTEM_LITERAL, /* within a SYSTEM value */ + XML_PARSER_EPILOG, /* the Misc* after the last end tag */ + XML_PARSER_IGNORE, /* within an IGNORED section */ + XML_PARSER_PUBLIC_LITERAL, /* within a PUBLIC value */ + XML_PARSER_XML_DECL /* before XML decl (but after BOM) */ +} xmlParserInputState; + +/** + * XML_DETECT_IDS: + * + * Bit in the loadsubset context field to tell to do ID/REFs lookups. + * Use it to initialize xmlLoadExtDtdDefaultValue. + */ +#define XML_DETECT_IDS 2 + +/** + * XML_COMPLETE_ATTRS: + * + * Bit in the loadsubset context field to tell to do complete the + * elements attributes lists with the ones defaulted from the DTDs. + * Use it to initialize xmlLoadExtDtdDefaultValue. + */ +#define XML_COMPLETE_ATTRS 4 + +/** + * XML_SKIP_IDS: + * + * Bit in the loadsubset context field to tell to not do ID/REFs registration. + * Used to initialize xmlLoadExtDtdDefaultValue in some special cases. + */ +#define XML_SKIP_IDS 8 + +/** + * xmlParserMode: + * + * A parser can operate in various modes + */ +typedef enum { + XML_PARSE_UNKNOWN = 0, + XML_PARSE_DOM = 1, + XML_PARSE_SAX = 2, + XML_PARSE_PUSH_DOM = 3, + XML_PARSE_PUSH_SAX = 4, + XML_PARSE_READER = 5 +} xmlParserMode; + +typedef struct _xmlStartTag xmlStartTag; +typedef struct _xmlParserNsData xmlParserNsData; +typedef struct _xmlAttrHashBucket xmlAttrHashBucket; + +/** + * xmlParserCtxt: + * + * The parser context. + * NOTE This doesn't completely define the parser state, the (current ?) + * design of the parser uses recursive function calls since this allow + * and easy mapping from the production rules of the specification + * to the actual code. The drawback is that the actual function call + * also reflect the parser state. However most of the parsing routines + * takes as the only argument the parser context pointer, so migrating + * to a state based parser for progressive parsing shouldn't be too hard. + */ +struct _xmlParserCtxt { + struct _xmlSAXHandler *sax; /* The SAX handler */ + void *userData; /* For SAX interface only, used by DOM build */ + xmlDocPtr myDoc; /* the document being built */ + int wellFormed; /* is the document well formed */ + int replaceEntities; /* shall we replace entities ? */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* the declared encoding, if any */ + int standalone; /* standalone document */ + int html; /* an HTML(1) document + * 3 is HTML after <head> + * 10 is HTML after <body> + */ + + /* Input stream stack */ + xmlParserInputPtr input; /* Current input stream */ + int inputNr; /* Number of current input streams */ + int inputMax; /* Max number of input streams */ + xmlParserInputPtr *inputTab; /* stack of inputs */ + + /* Node analysis stack only used for DOM building */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + int record_info; /* Whether node info should be kept */ + xmlParserNodeInfoSeq node_seq; /* info about each node parsed */ + + int errNo; /* error code */ + + int hasExternalSubset; /* reference and external subset */ + int hasPErefs; /* the internal subset has PE refs */ + int external; /* are we parsing an external entity */ + + int valid; /* is the document valid */ + int validate; /* shall we try to validate ? */ + xmlValidCtxt vctxt; /* The validity context */ + + xmlParserInputState instate; /* current type of input */ + int token; /* next char look-ahead */ + + char *directory; /* the data directory */ + + /* Node name stack */ + const xmlChar *name; /* Current parsed Node */ + int nameNr; /* Depth of the parsing stack */ + int nameMax; /* Max depth of the parsing stack */ + const xmlChar * *nameTab; /* array of nodes */ + + long nbChars; /* unused */ + long checkIndex; /* used by progressive parsing lookup */ + int keepBlanks; /* ugly but ... */ + int disableSAX; /* SAX callbacks are disabled */ + int inSubset; /* Parsing is in int 1/ext 2 subset */ + const xmlChar * intSubName; /* name of subset */ + xmlChar * extSubURI; /* URI of external subset */ + xmlChar * extSubSystem; /* SYSTEM ID of external subset */ + + /* xml:space values */ + int * space; /* Should the parser preserve spaces */ + int spaceNr; /* Depth of the parsing stack */ + int spaceMax; /* Max depth of the parsing stack */ + int * spaceTab; /* array of space infos */ + + int depth; /* to prevent entity substitution loops */ + xmlParserInputPtr entity; /* used to check entities boundaries */ + int charset; /* unused */ + int nodelen; /* Those two fields are there to */ + int nodemem; /* Speed up large node parsing */ + int pedantic; /* signal pedantic warnings */ + void *_private; /* For user data, libxml won't touch it */ + + int loadsubset; /* should the external subset be loaded */ + int linenumbers; /* set line number in element content */ + void *catalogs; /* document's own catalog */ + int recovery; /* run in recovery mode */ + int progressive; /* is this a progressive parsing */ + xmlDictPtr dict; /* dictionary for the parser */ + const xmlChar * *atts; /* array for the attributes callbacks */ + int maxatts; /* the size of the array */ + int docdict; /* use strings from dict to build tree */ + + /* + * pre-interned strings + */ + const xmlChar *str_xml; + const xmlChar *str_xmlns; + const xmlChar *str_xml_ns; + + /* + * Everything below is used only by the new SAX mode + */ + int sax2; /* operating in the new SAX mode */ + int nsNr; /* the number of inherited namespaces */ + int nsMax; /* the size of the arrays */ + const xmlChar * *nsTab; /* the array of prefix/namespace name */ + unsigned *attallocs; /* which attribute were allocated */ + xmlStartTag *pushTab; /* array of data for push */ + xmlHashTablePtr attsDefault; /* defaulted attributes if any */ + xmlHashTablePtr attsSpecial; /* non-CDATA attributes if any */ + int nsWellFormed; /* is the document XML Namespace okay */ + int options; /* Extra options */ + + /* + * Those fields are needed only for streaming parsing so far + */ + int dictNames; /* Use dictionary names for the tree */ + int freeElemsNr; /* number of freed element nodes */ + xmlNodePtr freeElems; /* List of freed element nodes */ + int freeAttrsNr; /* number of freed attributes nodes */ + xmlAttrPtr freeAttrs; /* List of freed attributes nodes */ + + /* + * the complete error information for the last error. + */ + xmlError lastError; + xmlParserMode parseMode; /* the parser mode */ + unsigned long nbentities; /* unused */ + unsigned long sizeentities; /* size of parsed entities */ + + /* for use by HTML non-recursive parser */ + xmlParserNodeInfo *nodeInfo; /* Current NodeInfo */ + int nodeInfoNr; /* Depth of the parsing stack */ + int nodeInfoMax; /* Max depth of the parsing stack */ + xmlParserNodeInfo *nodeInfoTab; /* array of nodeInfos */ + + int input_id; /* we need to label inputs */ + unsigned long sizeentcopy; /* volume of entity copy */ + + int endCheckState; /* quote state for push parser */ + unsigned short nbErrors; /* number of errors */ + unsigned short nbWarnings; /* number of warnings */ + unsigned maxAmpl; /* maximum amplification factor */ + + xmlParserNsData *nsdb; /* namespace database */ + unsigned attrHashMax; /* allocated size */ + xmlAttrHashBucket *attrHash; /* atttribute hash table */ +}; + +/** + * xmlSAXLocator: + * + * A SAX Locator. + */ +struct _xmlSAXLocator { + const xmlChar *(*getPublicId)(void *ctx); + const xmlChar *(*getSystemId)(void *ctx); + int (*getLineNumber)(void *ctx); + int (*getColumnNumber)(void *ctx); +}; + +/** + * xmlSAXHandler: + * + * A SAX handler is bunch of callbacks called by the parser when processing + * of the input generate data or structure information. + */ + +/** + * resolveEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * Callback: + * The entity loader, to control the loading of external entities, + * the application can either: + * - override this resolveEntity() callback in the SAX block + * - or better use the xmlSetExternalEntityLoader() function to + * set up it's own entity resolution routine + * + * Returns the xmlParserInputPtr if inlined or NULL for DOM behaviour. + */ +typedef xmlParserInputPtr (*resolveEntitySAXFunc) (void *ctx, + const xmlChar *publicId, + const xmlChar *systemId); +/** + * internalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on internal subset declaration. + */ +typedef void (*internalSubsetSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * externalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the root element name + * @ExternalID: the external ID + * @SystemID: the SYSTEM ID (e.g. filename or URL) + * + * Callback on external subset declaration. + */ +typedef void (*externalSubsetSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * getEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get an entity by name. + * + * Returns the xmlEntityPtr if found. + */ +typedef xmlEntityPtr (*getEntitySAXFunc) (void *ctx, + const xmlChar *name); +/** + * getParameterEntitySAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Get a parameter entity by name. + * + * Returns the xmlEntityPtr if found. + */ +typedef xmlEntityPtr (*getParameterEntitySAXFunc) (void *ctx, + const xmlChar *name); +/** + * entityDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the entity name + * @type: the entity type + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @content: the entity value (without processing). + * + * An entity definition has been parsed. + */ +typedef void (*entityDeclSAXFunc) (void *ctx, + const xmlChar *name, + int type, + const xmlChar *publicId, + const xmlChar *systemId, + xmlChar *content); +/** + * notationDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The name of the notation + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * + * What to do when a notation declaration has been parsed. + */ +typedef void (*notationDeclSAXFunc)(void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId); +/** + * attributeDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @elem: the name of the element + * @fullname: the attribute name + * @type: the attribute type + * @def: the type of default value + * @defaultValue: the attribute default value + * @tree: the tree of enumerated value set + * + * An attribute definition has been parsed. + */ +typedef void (*attributeDeclSAXFunc)(void *ctx, + const xmlChar *elem, + const xmlChar *fullname, + int type, + int def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +/** + * elementDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: the element name + * @type: the element type + * @content: the element value tree + * + * An element definition has been parsed. + */ +typedef void (*elementDeclSAXFunc)(void *ctx, + const xmlChar *name, + int type, + xmlElementContentPtr content); +/** + * unparsedEntityDeclSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The name of the entity + * @publicId: The public ID of the entity + * @systemId: The system ID of the entity + * @notationName: the name of the notation + * + * What to do when an unparsed entity declaration is parsed. + */ +typedef void (*unparsedEntityDeclSAXFunc)(void *ctx, + const xmlChar *name, + const xmlChar *publicId, + const xmlChar *systemId, + const xmlChar *notationName); +/** + * setDocumentLocatorSAXFunc: + * @ctx: the user data (XML parser context) + * @loc: A SAX Locator + * + * Receive the document locator at startup, actually xmlDefaultSAXLocator. + * Everything is available on the context, so this is useless in our case. + */ +typedef void (*setDocumentLocatorSAXFunc) (void *ctx, + xmlSAXLocatorPtr loc); +/** + * startDocumentSAXFunc: + * @ctx: the user data (XML parser context) + * + * Called when the document start being processed. + */ +typedef void (*startDocumentSAXFunc) (void *ctx); +/** + * endDocumentSAXFunc: + * @ctx: the user data (XML parser context) + * + * Called when the document end has been detected. + */ +typedef void (*endDocumentSAXFunc) (void *ctx); +/** + * startElementSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The element name, including namespace prefix + * @atts: An array of name/value attributes pairs, NULL terminated + * + * Called when an opening tag has been processed. + */ +typedef void (*startElementSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar **atts); +/** + * endElementSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The element name + * + * Called when the end of an element has been detected. + */ +typedef void (*endElementSAXFunc) (void *ctx, + const xmlChar *name); +/** + * attributeSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The attribute name, including namespace prefix + * @value: The attribute value + * + * Handle an attribute that has been read by the parser. + * The default handling is to convert the attribute into an + * DOM subtree and past it in a new xmlAttr element added to + * the element. + */ +typedef void (*attributeSAXFunc) (void *ctx, + const xmlChar *name, + const xmlChar *value); +/** + * referenceSAXFunc: + * @ctx: the user data (XML parser context) + * @name: The entity name + * + * Called when an entity reference is detected. + */ +typedef void (*referenceSAXFunc) (void *ctx, + const xmlChar *name); +/** + * charactersSAXFunc: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * Receiving some chars from the parser. + */ +typedef void (*charactersSAXFunc) (void *ctx, + const xmlChar *ch, + int len); +/** + * ignorableWhitespaceSAXFunc: + * @ctx: the user data (XML parser context) + * @ch: a xmlChar string + * @len: the number of xmlChar + * + * Receiving some ignorable whitespaces from the parser. + * UNUSED: by default the DOM building will use characters. + */ +typedef void (*ignorableWhitespaceSAXFunc) (void *ctx, + const xmlChar *ch, + int len); +/** + * processingInstructionSAXFunc: + * @ctx: the user data (XML parser context) + * @target: the target name + * @data: the PI data's + * + * A processing instruction has been parsed. + */ +typedef void (*processingInstructionSAXFunc) (void *ctx, + const xmlChar *target, + const xmlChar *data); +/** + * commentSAXFunc: + * @ctx: the user data (XML parser context) + * @value: the comment content + * + * A comment has been parsed. + */ +typedef void (*commentSAXFunc) (void *ctx, + const xmlChar *value); +/** + * cdataBlockSAXFunc: + * @ctx: the user data (XML parser context) + * @value: The pcdata content + * @len: the block length + * + * Called when a pcdata block has been parsed. + */ +typedef void (*cdataBlockSAXFunc) ( + void *ctx, + const xmlChar *value, + int len); +/** + * warningSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format a warning messages, callback. + */ +typedef void (*warningSAXFunc) (void *ctx, + const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); +/** + * errorSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format an error messages, callback. + */ +typedef void (*errorSAXFunc) (void *ctx, + const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); +/** + * fatalErrorSAXFunc: + * @ctx: an XML parser context + * @msg: the message to display/transmit + * @...: extra parameters for the message display + * + * Display and format fatal error messages, callback. + * Note: so far fatalError() SAX callbacks are not used, error() + * get all the callbacks for errors. + */ +typedef void (*fatalErrorSAXFunc) (void *ctx, + const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); +/** + * isStandaloneSAXFunc: + * @ctx: the user data (XML parser context) + * + * Is this document tagged standalone? + * + * Returns 1 if true + */ +typedef int (*isStandaloneSAXFunc) (void *ctx); +/** + * hasInternalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * + * Does this document has an internal subset. + * + * Returns 1 if true + */ +typedef int (*hasInternalSubsetSAXFunc) (void *ctx); + +/** + * hasExternalSubsetSAXFunc: + * @ctx: the user data (XML parser context) + * + * Does this document has an external subset? + * + * Returns 1 if true + */ +typedef int (*hasExternalSubsetSAXFunc) (void *ctx); + +/************************************************************************ + * * + * The SAX version 2 API extensions * + * * + ************************************************************************/ +/** + * XML_SAX2_MAGIC: + * + * Special constant found in SAX2 blocks initialized fields + */ +#define XML_SAX2_MAGIC 0xDEEDBEAF + +/** + * startElementNsSAX2Func: + * @ctx: the user data (XML parser context) + * @localname: the local name of the element + * @prefix: the element namespace prefix if available + * @URI: the element namespace name if available + * @nb_namespaces: number of namespace definitions on that node + * @namespaces: pointer to the array of prefix/URI pairs namespace definitions + * @nb_attributes: the number of attributes on that node + * @nb_defaulted: the number of defaulted attributes. The defaulted + * ones are at the end of the array + * @attributes: pointer to the array of (localname/prefix/URI/value/end) + * attribute values. + * + * SAX2 callback when an element start has been detected by the parser. + * It provides the namespace information for the element, as well as + * the new namespace declarations on the element. + */ + +typedef void (*startElementNsSAX2Func) (void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *URI, + int nb_namespaces, + const xmlChar **namespaces, + int nb_attributes, + int nb_defaulted, + const xmlChar **attributes); + +/** + * endElementNsSAX2Func: + * @ctx: the user data (XML parser context) + * @localname: the local name of the element + * @prefix: the element namespace prefix if available + * @URI: the element namespace name if available + * + * SAX2 callback when an element end has been detected by the parser. + * It provides the namespace information for the element. + */ + +typedef void (*endElementNsSAX2Func) (void *ctx, + const xmlChar *localname, + const xmlChar *prefix, + const xmlChar *URI); + + +struct _xmlSAXHandler { + internalSubsetSAXFunc internalSubset; + isStandaloneSAXFunc isStandalone; + hasInternalSubsetSAXFunc hasInternalSubset; + hasExternalSubsetSAXFunc hasExternalSubset; + resolveEntitySAXFunc resolveEntity; + getEntitySAXFunc getEntity; + entityDeclSAXFunc entityDecl; + notationDeclSAXFunc notationDecl; + attributeDeclSAXFunc attributeDecl; + elementDeclSAXFunc elementDecl; + unparsedEntityDeclSAXFunc unparsedEntityDecl; + setDocumentLocatorSAXFunc setDocumentLocator; + startDocumentSAXFunc startDocument; + endDocumentSAXFunc endDocument; + /* + * `startElement` and `endElement` are only used by the legacy SAX1 + * interface and should not be used in new software. If you really + * have to enable SAX1, the preferred way is set the `initialized` + * member to 1 instead of XML_SAX2_MAGIC. + * + * For backward compatibility, it's also possible to set the + * `startElementNs` and `endElementNs` handlers to NULL. + * + * You can also set the XML_PARSE_SAX1 parser option, but versions + * older than 2.12.0 will probably crash if this option is provided + * together with custom SAX callbacks. + */ + startElementSAXFunc startElement; + endElementSAXFunc endElement; + referenceSAXFunc reference; + charactersSAXFunc characters; + ignorableWhitespaceSAXFunc ignorableWhitespace; + processingInstructionSAXFunc processingInstruction; + commentSAXFunc comment; + warningSAXFunc warning; + errorSAXFunc error; + fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ + getParameterEntitySAXFunc getParameterEntity; + cdataBlockSAXFunc cdataBlock; + externalSubsetSAXFunc externalSubset; + /* + * `initialized` should always be set to XML_SAX2_MAGIC to enable the + * modern SAX2 interface. + */ + unsigned int initialized; + /* + * The following members are only used by the SAX2 interface. + */ + void *_private; + startElementNsSAX2Func startElementNs; + endElementNsSAX2Func endElementNs; + xmlStructuredErrorFunc serror; +}; + +/* + * SAX Version 1 + */ +typedef struct _xmlSAXHandlerV1 xmlSAXHandlerV1; +typedef xmlSAXHandlerV1 *xmlSAXHandlerV1Ptr; +struct _xmlSAXHandlerV1 { + internalSubsetSAXFunc internalSubset; + isStandaloneSAXFunc isStandalone; + hasInternalSubsetSAXFunc hasInternalSubset; + hasExternalSubsetSAXFunc hasExternalSubset; + resolveEntitySAXFunc resolveEntity; + getEntitySAXFunc getEntity; + entityDeclSAXFunc entityDecl; + notationDeclSAXFunc notationDecl; + attributeDeclSAXFunc attributeDecl; + elementDeclSAXFunc elementDecl; + unparsedEntityDeclSAXFunc unparsedEntityDecl; + setDocumentLocatorSAXFunc setDocumentLocator; + startDocumentSAXFunc startDocument; + endDocumentSAXFunc endDocument; + startElementSAXFunc startElement; + endElementSAXFunc endElement; + referenceSAXFunc reference; + charactersSAXFunc characters; + ignorableWhitespaceSAXFunc ignorableWhitespace; + processingInstructionSAXFunc processingInstruction; + commentSAXFunc comment; + warningSAXFunc warning; + errorSAXFunc error; + fatalErrorSAXFunc fatalError; /* unused error() get all the errors */ + getParameterEntitySAXFunc getParameterEntity; + cdataBlockSAXFunc cdataBlock; + externalSubsetSAXFunc externalSubset; + unsigned int initialized; +}; + + +/** + * xmlExternalEntityLoader: + * @URL: The System ID of the resource requested + * @ID: The Public ID of the resource requested + * @context: the XML parser context + * + * External entity loaders types. + * + * Returns the entity input parser. + */ +typedef xmlParserInputPtr (*xmlExternalEntityLoader) (const char *URL, + const char *ID, + xmlParserCtxtPtr context); + +/* + * Variables + */ + +XMLPUBVAR const char *const xmlParserVersion; +#ifdef LIBXML_THREAD_ENABLED +/* backward compatibility */ +XMLPUBFUN const char *const *__xmlParserVersion(void); +#endif + +/** DOC_DISABLE */ +#define XML_GLOBALS_PARSER_CORE \ + XML_OP(oldXMLWDcompatibility, int, XML_DEPRECATED) \ + XML_OP(xmlDefaultSAXLocator, xmlSAXLocator, XML_DEPRECATED) \ + XML_OP(xmlDoValidityCheckingDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlGetWarningsDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlKeepBlanksDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlLineNumbersDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlLoadExtDtdDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlParserDebugEntities, int, XML_DEPRECATED) \ + XML_OP(xmlPedanticParserDefaultValue, int, XML_DEPRECATED) \ + XML_OP(xmlSubstituteEntitiesDefaultValue, int, XML_DEPRECATED) + +#ifdef LIBXML_OUTPUT_ENABLED + #define XML_GLOBALS_PARSER_OUTPUT \ + XML_OP(xmlIndentTreeOutput, int, XML_NO_ATTR) \ + XML_OP(xmlTreeIndentString, const char *, XML_NO_ATTR) \ + XML_OP(xmlSaveNoEmptyTags, int, XML_NO_ATTR) +#else + #define XML_GLOBALS_PARSER_OUTPUT +#endif + +#ifdef LIBXML_SAX1_ENABLED + #define XML_GLOBALS_PARSER_SAX1 \ + XML_OP(xmlDefaultSAXHandler, xmlSAXHandlerV1, XML_DEPRECATED) +#else + #define XML_GLOBALS_PARSER_SAX1 +#endif + +#define XML_GLOBALS_PARSER \ + XML_GLOBALS_PARSER_CORE \ + XML_GLOBALS_PARSER_OUTPUT \ + XML_GLOBALS_PARSER_SAX1 + +#define XML_OP XML_DECLARE_GLOBAL +XML_GLOBALS_PARSER +#undef XML_OP + +#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define oldXMLWDcompatibility XML_GLOBAL_MACRO(oldXMLWDcompatibility) + #define xmlDefaultSAXHandler XML_GLOBAL_MACRO(xmlDefaultSAXHandler) + #define xmlDefaultSAXLocator XML_GLOBAL_MACRO(xmlDefaultSAXLocator) + #define xmlDoValidityCheckingDefaultValue \ + XML_GLOBAL_MACRO(xmlDoValidityCheckingDefaultValue) + #define xmlGetWarningsDefaultValue \ + XML_GLOBAL_MACRO(xmlGetWarningsDefaultValue) + #define xmlKeepBlanksDefaultValue XML_GLOBAL_MACRO(xmlKeepBlanksDefaultValue) + #define xmlLineNumbersDefaultValue \ + XML_GLOBAL_MACRO(xmlLineNumbersDefaultValue) + #define xmlLoadExtDtdDefaultValue XML_GLOBAL_MACRO(xmlLoadExtDtdDefaultValue) + #define xmlParserDebugEntities XML_GLOBAL_MACRO(xmlParserDebugEntities) + #define xmlPedanticParserDefaultValue \ + XML_GLOBAL_MACRO(xmlPedanticParserDefaultValue) + #define xmlSubstituteEntitiesDefaultValue \ + XML_GLOBAL_MACRO(xmlSubstituteEntitiesDefaultValue) + #ifdef LIBXML_OUTPUT_ENABLED + #define xmlIndentTreeOutput XML_GLOBAL_MACRO(xmlIndentTreeOutput) + #define xmlTreeIndentString XML_GLOBAL_MACRO(xmlTreeIndentString) + #define xmlSaveNoEmptyTags XML_GLOBAL_MACRO(xmlSaveNoEmptyTags) + #endif +#endif +/** DOC_ENABLE */ + +/* + * Init/Cleanup + */ +XMLPUBFUN void + xmlInitParser (void); +XMLPUBFUN void + xmlCleanupParser (void); +XML_DEPRECATED +XMLPUBFUN void + xmlInitGlobals (void); +XML_DEPRECATED +XMLPUBFUN void + xmlCleanupGlobals (void); + +/* + * Input functions + */ +XML_DEPRECATED +XMLPUBFUN int + xmlParserInputRead (xmlParserInputPtr in, + int len); +XML_DEPRECATED +XMLPUBFUN int + xmlParserInputGrow (xmlParserInputPtr in, + int len); + +/* + * Basic parsing Interfaces + */ +#ifdef LIBXML_SAX1_ENABLED +XMLPUBFUN xmlDocPtr + xmlParseDoc (const xmlChar *cur); +XMLPUBFUN xmlDocPtr + xmlParseFile (const char *filename); +XMLPUBFUN xmlDocPtr + xmlParseMemory (const char *buffer, + int size); +#endif /* LIBXML_SAX1_ENABLED */ +XML_DEPRECATED XMLPUBFUN int + xmlSubstituteEntitiesDefault(int val); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefSubstituteEntitiesDefaultValue(int v); +XMLPUBFUN int + xmlKeepBlanksDefault (int val); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefKeepBlanksDefaultValue(int v); +XMLPUBFUN void + xmlStopParser (xmlParserCtxtPtr ctxt); +XML_DEPRECATED XMLPUBFUN int + xmlPedanticParserDefault(int val); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefPedanticParserDefaultValue(int v); +XML_DEPRECATED XMLPUBFUN int + xmlLineNumbersDefault (int val); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefLineNumbersDefaultValue(int v); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefDoValidityCheckingDefaultValue(int v); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefGetWarningsDefaultValue(int v); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefLoadExtDtdDefaultValue(int v); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefParserDebugEntities(int v); + +#ifdef LIBXML_SAX1_ENABLED +/* + * Recovery mode + */ +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlRecoverDoc (const xmlChar *cur); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlRecoverMemory (const char *buffer, + int size); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlRecoverFile (const char *filename); +#endif /* LIBXML_SAX1_ENABLED */ + +/* + * Less common routines and SAX interfaces + */ +XMLPUBFUN int + xmlParseDocument (xmlParserCtxtPtr ctxt); +XMLPUBFUN int + xmlParseExtParsedEnt (xmlParserCtxtPtr ctxt); +#ifdef LIBXML_SAX1_ENABLED +XML_DEPRECATED +XMLPUBFUN int + xmlSAXUserParseFile (xmlSAXHandlerPtr sax, + void *user_data, + const char *filename); +XML_DEPRECATED +XMLPUBFUN int + xmlSAXUserParseMemory (xmlSAXHandlerPtr sax, + void *user_data, + const char *buffer, + int size); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseDoc (xmlSAXHandlerPtr sax, + const xmlChar *cur, + int recovery); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseMemory (xmlSAXHandlerPtr sax, + const char *buffer, + int size, + int recovery); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseMemoryWithData (xmlSAXHandlerPtr sax, + const char *buffer, + int size, + int recovery, + void *data); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseFile (xmlSAXHandlerPtr sax, + const char *filename, + int recovery); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseFileWithData (xmlSAXHandlerPtr sax, + const char *filename, + int recovery, + void *data); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlSAXParseEntity (xmlSAXHandlerPtr sax, + const char *filename); +XML_DEPRECATED +XMLPUBFUN xmlDocPtr + xmlParseEntity (const char *filename); +#endif /* LIBXML_SAX1_ENABLED */ + +#ifdef LIBXML_VALID_ENABLED +XML_DEPRECATED +XMLPUBFUN xmlDtdPtr + xmlSAXParseDTD (xmlSAXHandlerPtr sax, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN xmlDtdPtr + xmlParseDTD (const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN xmlDtdPtr + xmlIOParseDTD (xmlSAXHandlerPtr sax, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); +#endif /* LIBXML_VALID_ENABLE */ +#ifdef LIBXML_SAX1_ENABLED +XMLPUBFUN int + xmlParseBalancedChunkMemory(xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *string, + xmlNodePtr *lst); +#endif /* LIBXML_SAX1_ENABLED */ +XMLPUBFUN xmlParserErrors + xmlParseInNodeContext (xmlNodePtr node, + const char *data, + int datalen, + int options, + xmlNodePtr *lst); +#ifdef LIBXML_SAX1_ENABLED +XMLPUBFUN int + xmlParseBalancedChunkMemoryRecover(xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *string, + xmlNodePtr *lst, + int recover); +XML_DEPRECATED +XMLPUBFUN int + xmlParseExternalEntity (xmlDocPtr doc, + xmlSAXHandlerPtr sax, + void *user_data, + int depth, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *lst); +#endif /* LIBXML_SAX1_ENABLED */ +XMLPUBFUN int + xmlParseCtxtExternalEntity(xmlParserCtxtPtr ctx, + const xmlChar *URL, + const xmlChar *ID, + xmlNodePtr *lst); + +/* + * Parser contexts handling. + */ +XMLPUBFUN xmlParserCtxtPtr + xmlNewParserCtxt (void); +XMLPUBFUN xmlParserCtxtPtr + xmlNewSAXParserCtxt (const xmlSAXHandler *sax, + void *userData); +XMLPUBFUN int + xmlInitParserCtxt (xmlParserCtxtPtr ctxt); +XMLPUBFUN void + xmlClearParserCtxt (xmlParserCtxtPtr ctxt); +XMLPUBFUN void + xmlFreeParserCtxt (xmlParserCtxtPtr ctxt); +#ifdef LIBXML_SAX1_ENABLED +XML_DEPRECATED +XMLPUBFUN void + xmlSetupParserForBuffer (xmlParserCtxtPtr ctxt, + const xmlChar* buffer, + const char *filename); +#endif /* LIBXML_SAX1_ENABLED */ +XMLPUBFUN xmlParserCtxtPtr + xmlCreateDocParserCtxt (const xmlChar *cur); + +#ifdef LIBXML_LEGACY_ENABLED +/* + * Reading/setting optional parsing features. + */ +XML_DEPRECATED +XMLPUBFUN int + xmlGetFeaturesList (int *len, + const char **result); +XML_DEPRECATED +XMLPUBFUN int + xmlGetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *result); +XML_DEPRECATED +XMLPUBFUN int + xmlSetFeature (xmlParserCtxtPtr ctxt, + const char *name, + void *value); +#endif /* LIBXML_LEGACY_ENABLED */ + +#ifdef LIBXML_PUSH_ENABLED +/* + * Interfaces for the Push mode. + */ +XMLPUBFUN xmlParserCtxtPtr + xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, + void *user_data, + const char *chunk, + int size, + const char *filename); +XMLPUBFUN int + xmlParseChunk (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + int terminate); +#endif /* LIBXML_PUSH_ENABLED */ + +/* + * Special I/O mode. + */ + +XMLPUBFUN xmlParserCtxtPtr + xmlCreateIOParserCtxt (xmlSAXHandlerPtr sax, + void *user_data, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); + +XMLPUBFUN xmlParserInputPtr + xmlNewIOInputStream (xmlParserCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc); + +/* + * Node infos. + */ +XMLPUBFUN const xmlParserNodeInfo* + xmlParserFindNodeInfo (const xmlParserCtxtPtr ctxt, + const xmlNodePtr node); +XMLPUBFUN void + xmlInitNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +XMLPUBFUN void + xmlClearNodeInfoSeq (xmlParserNodeInfoSeqPtr seq); +XMLPUBFUN unsigned long + xmlParserFindNodeInfoIndex(const xmlParserNodeInfoSeqPtr seq, + const xmlNodePtr node); +XMLPUBFUN void + xmlParserAddNodeInfo (xmlParserCtxtPtr ctxt, + const xmlParserNodeInfoPtr info); + +/* + * External entities handling actually implemented in xmlIO. + */ + +XMLPUBFUN void + xmlSetExternalEntityLoader(xmlExternalEntityLoader f); +XMLPUBFUN xmlExternalEntityLoader + xmlGetExternalEntityLoader(void); +XMLPUBFUN xmlParserInputPtr + xmlLoadExternalEntity (const char *URL, + const char *ID, + xmlParserCtxtPtr ctxt); + +/* + * Index lookup, actually implemented in the encoding module + */ +XMLPUBFUN long + xmlByteConsumed (xmlParserCtxtPtr ctxt); + +/* + * New set of simpler/more flexible APIs + */ +/** + * xmlParserOption: + * + * This is the set of XML parser options that can be passed down + * to the xmlReadDoc() and similar calls. + */ +typedef enum { + XML_PARSE_RECOVER = 1<<0, /* recover on errors */ + XML_PARSE_NOENT = 1<<1, /* substitute entities */ + XML_PARSE_DTDLOAD = 1<<2, /* load the external subset */ + XML_PARSE_DTDATTR = 1<<3, /* default DTD attributes */ + XML_PARSE_DTDVALID = 1<<4, /* validate with the DTD */ + XML_PARSE_NOERROR = 1<<5, /* suppress error reports */ + XML_PARSE_NOWARNING = 1<<6, /* suppress warning reports */ + XML_PARSE_PEDANTIC = 1<<7, /* pedantic error reporting */ + XML_PARSE_NOBLANKS = 1<<8, /* remove blank nodes */ + XML_PARSE_SAX1 = 1<<9, /* use the SAX1 interface internally */ + XML_PARSE_XINCLUDE = 1<<10,/* Implement XInclude substitution */ + XML_PARSE_NONET = 1<<11,/* Forbid network access */ + XML_PARSE_NODICT = 1<<12,/* Do not reuse the context dictionary */ + XML_PARSE_NSCLEAN = 1<<13,/* remove redundant namespaces declarations */ + XML_PARSE_NOCDATA = 1<<14,/* merge CDATA as text nodes */ + XML_PARSE_NOXINCNODE= 1<<15,/* do not generate XINCLUDE START/END nodes */ + XML_PARSE_COMPACT = 1<<16,/* compact small text nodes; no modification of + the tree allowed afterwards (will possibly + crash if you try to modify the tree) */ + XML_PARSE_OLD10 = 1<<17,/* parse using XML-1.0 before update 5 */ + XML_PARSE_NOBASEFIX = 1<<18,/* do not fixup XINCLUDE xml:base uris */ + XML_PARSE_HUGE = 1<<19,/* relax any hardcoded limit from the parser */ + XML_PARSE_OLDSAX = 1<<20,/* parse using SAX2 interface before 2.7.0 */ + XML_PARSE_IGNORE_ENC= 1<<21,/* ignore internal document encoding hint */ + XML_PARSE_BIG_LINES = 1<<22 /* Store big lines numbers in text PSVI field */ +} xmlParserOption; + +XMLPUBFUN void + xmlCtxtReset (xmlParserCtxtPtr ctxt); +XMLPUBFUN int + xmlCtxtResetPush (xmlParserCtxtPtr ctxt, + const char *chunk, + int size, + const char *filename, + const char *encoding); +XMLPUBFUN int + xmlCtxtUseOptions (xmlParserCtxtPtr ctxt, + int options); +XMLPUBFUN void + xmlCtxtSetMaxAmplification(xmlParserCtxtPtr ctxt, + unsigned maxAmpl); +XMLPUBFUN xmlDocPtr + xmlReadDoc (const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlReadFile (const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlReadMemory (const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlReadFd (int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlReadIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlCtxtReadDoc (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlCtxtReadFile (xmlParserCtxtPtr ctxt, + const char *filename, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlCtxtReadMemory (xmlParserCtxtPtr ctxt, + const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlCtxtReadFd (xmlParserCtxtPtr ctxt, + int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlDocPtr + xmlCtxtReadIO (xmlParserCtxtPtr ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); + +/* + * Library wide options + */ +/** + * xmlFeature: + * + * Used to examine the existence of features that can be enabled + * or disabled at compile-time. + * They used to be called XML_FEATURE_xxx but this clashed with Expat + */ +typedef enum { + XML_WITH_THREAD = 1, + XML_WITH_TREE = 2, + XML_WITH_OUTPUT = 3, + XML_WITH_PUSH = 4, + XML_WITH_READER = 5, + XML_WITH_PATTERN = 6, + XML_WITH_WRITER = 7, + XML_WITH_SAX1 = 8, + XML_WITH_FTP = 9, + XML_WITH_HTTP = 10, + XML_WITH_VALID = 11, + XML_WITH_HTML = 12, + XML_WITH_LEGACY = 13, + XML_WITH_C14N = 14, + XML_WITH_CATALOG = 15, + XML_WITH_XPATH = 16, + XML_WITH_XPTR = 17, + XML_WITH_XINCLUDE = 18, + XML_WITH_ICONV = 19, + XML_WITH_ISO8859X = 20, + XML_WITH_UNICODE = 21, + XML_WITH_REGEXP = 22, + XML_WITH_AUTOMATA = 23, + XML_WITH_EXPR = 24, + XML_WITH_SCHEMAS = 25, + XML_WITH_SCHEMATRON = 26, + XML_WITH_MODULES = 27, + XML_WITH_DEBUG = 28, + XML_WITH_DEBUG_MEM = 29, + XML_WITH_DEBUG_RUN = 30, + XML_WITH_ZLIB = 31, + XML_WITH_ICU = 32, + XML_WITH_LZMA = 33, + XML_WITH_NONE = 99999 /* just to be sure of allocation size */ +} xmlFeature; + +XMLPUBFUN int + xmlHasFeature (xmlFeature feature); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_PARSER_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parserInternals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parserInternals.h new file mode 100644 index 00000000..017ed273 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/parserInternals.h @@ -0,0 +1,663 @@ +/* + * Summary: internals routines and limits exported by the parser. + * Description: this module exports a number of internal parsing routines + * they are not really all intended for applications but + * can prove useful doing low level processing. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_PARSER_INTERNALS_H__ +#define __XML_PARSER_INTERNALS_H__ + +#include <libxml/xmlversion.h> +#include <libxml/parser.h> +#include <libxml/HTMLparser.h> +#include <libxml/chvalid.h> +#include <libxml/SAX2.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlParserMaxDepth: + * + * arbitrary depth limit for the XML documents that we allow to + * process. This is not a limitation of the parser but a safety + * boundary feature, use XML_PARSE_HUGE option to override it. + */ +XMLPUBVAR unsigned int xmlParserMaxDepth; + +/** + * XML_MAX_TEXT_LENGTH: + * + * Maximum size allowed for a single text node when building a tree. + * This is not a limitation of the parser but a safety boundary feature, + * use XML_PARSE_HUGE option to override it. + * Introduced in 2.9.0 + */ +#define XML_MAX_TEXT_LENGTH 10000000 + +/** + * XML_MAX_HUGE_LENGTH: + * + * Maximum size allowed when XML_PARSE_HUGE is set. + */ +#define XML_MAX_HUGE_LENGTH 1000000000 + +/** + * XML_MAX_NAME_LENGTH: + * + * Maximum size allowed for a markup identifier. + * This is not a limitation of the parser but a safety boundary feature, + * use XML_PARSE_HUGE option to override it. + * Note that with the use of parsing dictionaries overriding the limit + * may result in more runtime memory usage in face of "unfriendly' content + * Introduced in 2.9.0 + */ +#define XML_MAX_NAME_LENGTH 50000 + +/** + * XML_MAX_DICTIONARY_LIMIT: + * + * Maximum size allowed by the parser for a dictionary by default + * This is not a limitation of the parser but a safety boundary feature, + * use XML_PARSE_HUGE option to override it. + * Introduced in 2.9.0 + */ +#define XML_MAX_DICTIONARY_LIMIT 10000000 + +/** + * XML_MAX_LOOKUP_LIMIT: + * + * Maximum size allowed by the parser for ahead lookup + * This is an upper boundary enforced by the parser to avoid bad + * behaviour on "unfriendly' content + * Introduced in 2.9.0 + */ +#define XML_MAX_LOOKUP_LIMIT 10000000 + +/** + * XML_MAX_NAMELEN: + * + * Identifiers can be longer, but this will be more costly + * at runtime. + */ +#define XML_MAX_NAMELEN 100 + +/** + * INPUT_CHUNK: + * + * The parser tries to always have that amount of input ready. + * One of the point is providing context when reporting errors. + */ +#define INPUT_CHUNK 250 + +/************************************************************************ + * * + * UNICODE version of the macros. * + * * + ************************************************************************/ +/** + * IS_BYTE_CHAR: + * @c: an byte value (int) + * + * Macro to check the following production in the XML spec: + * + * [2] Char ::= #x9 | #xA | #xD | [#x20...] + * any byte character in the accepted range + */ +#define IS_BYTE_CHAR(c) xmlIsChar_ch(c) + +/** + * IS_CHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] + * | [#x10000-#x10FFFF] + * any Unicode character, excluding the surrogate blocks, FFFE, and FFFF. + */ +#define IS_CHAR(c) xmlIsCharQ(c) + +/** + * IS_CHAR_CH: + * @c: an xmlChar (usually an unsigned char) + * + * Behaves like IS_CHAR on single-byte value + */ +#define IS_CHAR_CH(c) xmlIsChar_ch(c) + +/** + * IS_BLANK: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [3] S ::= (#x20 | #x9 | #xD | #xA)+ + */ +#define IS_BLANK(c) xmlIsBlankQ(c) + +/** + * IS_BLANK_CH: + * @c: an xmlChar value (normally unsigned char) + * + * Behaviour same as IS_BLANK + */ +#define IS_BLANK_CH(c) xmlIsBlank_ch(c) + +/** + * IS_BASECHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [85] BaseChar ::= ... long list see REC ... + */ +#define IS_BASECHAR(c) xmlIsBaseCharQ(c) + +/** + * IS_DIGIT: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [88] Digit ::= ... long list see REC ... + */ +#define IS_DIGIT(c) xmlIsDigitQ(c) + +/** + * IS_DIGIT_CH: + * @c: an xmlChar value (usually an unsigned char) + * + * Behaves like IS_DIGIT but with a single byte argument + */ +#define IS_DIGIT_CH(c) xmlIsDigit_ch(c) + +/** + * IS_COMBINING: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * [87] CombiningChar ::= ... long list see REC ... + */ +#define IS_COMBINING(c) xmlIsCombiningQ(c) + +/** + * IS_COMBINING_CH: + * @c: an xmlChar (usually an unsigned char) + * + * Always false (all combining chars > 0xff) + */ +#define IS_COMBINING_CH(c) 0 + +/** + * IS_EXTENDER: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [89] Extender ::= #x00B7 | #x02D0 | #x02D1 | #x0387 | #x0640 | + * #x0E46 | #x0EC6 | #x3005 | [#x3031-#x3035] | + * [#x309D-#x309E] | [#x30FC-#x30FE] + */ +#define IS_EXTENDER(c) xmlIsExtenderQ(c) + +/** + * IS_EXTENDER_CH: + * @c: an xmlChar value (usually an unsigned char) + * + * Behaves like IS_EXTENDER but with a single-byte argument + */ +#define IS_EXTENDER_CH(c) xmlIsExtender_ch(c) + +/** + * IS_IDEOGRAPHIC: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [86] Ideographic ::= [#x4E00-#x9FA5] | #x3007 | [#x3021-#x3029] + */ +#define IS_IDEOGRAPHIC(c) xmlIsIdeographicQ(c) + +/** + * IS_LETTER: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [84] Letter ::= BaseChar | Ideographic + */ +#define IS_LETTER(c) (IS_BASECHAR(c) || IS_IDEOGRAPHIC(c)) + +/** + * IS_LETTER_CH: + * @c: an xmlChar value (normally unsigned char) + * + * Macro behaves like IS_LETTER, but only check base chars + * + */ +#define IS_LETTER_CH(c) xmlIsBaseChar_ch(c) + +/** + * IS_ASCII_LETTER: + * @c: an xmlChar value + * + * Macro to check [a-zA-Z] + * + */ +#define IS_ASCII_LETTER(c) (((0x41 <= (c)) && ((c) <= 0x5a)) || \ + ((0x61 <= (c)) && ((c) <= 0x7a))) + +/** + * IS_ASCII_DIGIT: + * @c: an xmlChar value + * + * Macro to check [0-9] + * + */ +#define IS_ASCII_DIGIT(c) ((0x30 <= (c)) && ((c) <= 0x39)) + +/** + * IS_PUBIDCHAR: + * @c: an UNICODE value (int) + * + * Macro to check the following production in the XML spec: + * + * + * [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] + */ +#define IS_PUBIDCHAR(c) xmlIsPubidCharQ(c) + +/** + * IS_PUBIDCHAR_CH: + * @c: an xmlChar value (normally unsigned char) + * + * Same as IS_PUBIDCHAR but for single-byte value + */ +#define IS_PUBIDCHAR_CH(c) xmlIsPubidChar_ch(c) + +/** + * Global variables used for predefined strings. + */ +XMLPUBVAR const xmlChar xmlStringText[]; +XMLPUBVAR const xmlChar xmlStringTextNoenc[]; +XMLPUBVAR const xmlChar xmlStringComment[]; + +/* + * Function to finish the work of the macros where needed. + */ +XMLPUBFUN int xmlIsLetter (int c); + +/** + * Parser context. + */ +XMLPUBFUN xmlParserCtxtPtr + xmlCreateFileParserCtxt (const char *filename); +XMLPUBFUN xmlParserCtxtPtr + xmlCreateURLParserCtxt (const char *filename, + int options); +XMLPUBFUN xmlParserCtxtPtr + xmlCreateMemoryParserCtxt(const char *buffer, + int size); +XMLPUBFUN xmlParserCtxtPtr + xmlCreateEntityParserCtxt(const xmlChar *URL, + const xmlChar *ID, + const xmlChar *base); +XMLPUBFUN int + xmlSwitchEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncoding enc); +XMLPUBFUN int + xmlSwitchToEncoding (xmlParserCtxtPtr ctxt, + xmlCharEncodingHandlerPtr handler); +XML_DEPRECATED +XMLPUBFUN int + xmlSwitchInputEncoding (xmlParserCtxtPtr ctxt, + xmlParserInputPtr input, + xmlCharEncodingHandlerPtr handler); + +/** + * Input Streams. + */ +XMLPUBFUN xmlParserInputPtr + xmlNewStringInputStream (xmlParserCtxtPtr ctxt, + const xmlChar *buffer); +XML_DEPRECATED +XMLPUBFUN xmlParserInputPtr + xmlNewEntityInputStream (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); +XMLPUBFUN int + xmlPushInput (xmlParserCtxtPtr ctxt, + xmlParserInputPtr input); +XMLPUBFUN xmlChar + xmlPopInput (xmlParserCtxtPtr ctxt); +XMLPUBFUN void + xmlFreeInputStream (xmlParserInputPtr input); +XMLPUBFUN xmlParserInputPtr + xmlNewInputFromFile (xmlParserCtxtPtr ctxt, + const char *filename); +XMLPUBFUN xmlParserInputPtr + xmlNewInputStream (xmlParserCtxtPtr ctxt); + +/** + * Namespaces. + */ +XMLPUBFUN xmlChar * + xmlSplitQName (xmlParserCtxtPtr ctxt, + const xmlChar *name, + xmlChar **prefix); + +/** + * Generic production rules. + */ +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlParseName (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseNmtoken (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseEntityValue (xmlParserCtxtPtr ctxt, + xmlChar **orig); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseAttValue (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseSystemLiteral (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParsePubidLiteral (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseCharData (xmlParserCtxtPtr ctxt, + int cdata); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseExternalID (xmlParserCtxtPtr ctxt, + xmlChar **publicID, + int strict); +XML_DEPRECATED +XMLPUBFUN void + xmlParseComment (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlParsePITarget (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParsePI (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseNotationDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseEntityDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int + xmlParseDefaultDecl (xmlParserCtxtPtr ctxt, + xmlChar **value); +XML_DEPRECATED +XMLPUBFUN xmlEnumerationPtr + xmlParseNotationType (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlEnumerationPtr + xmlParseEnumerationType (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int + xmlParseEnumeratedType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +XML_DEPRECATED +XMLPUBFUN int + xmlParseAttributeType (xmlParserCtxtPtr ctxt, + xmlEnumerationPtr *tree); +XML_DEPRECATED +XMLPUBFUN void + xmlParseAttributeListDecl(xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlElementContentPtr + xmlParseElementMixedContentDecl + (xmlParserCtxtPtr ctxt, + int inputchk); +XML_DEPRECATED +XMLPUBFUN xmlElementContentPtr + xmlParseElementChildrenContentDecl + (xmlParserCtxtPtr ctxt, + int inputchk); +XML_DEPRECATED +XMLPUBFUN int + xmlParseElementContentDecl(xmlParserCtxtPtr ctxt, + const xmlChar *name, + xmlElementContentPtr *result); +XML_DEPRECATED +XMLPUBFUN int + xmlParseElementDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseMarkupDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int + xmlParseCharRef (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlEntityPtr + xmlParseEntityRef (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseReference (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParsePEReference (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseDocTypeDecl (xmlParserCtxtPtr ctxt); +#ifdef LIBXML_SAX1_ENABLED +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlParseAttribute (xmlParserCtxtPtr ctxt, + xmlChar **value); +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlParseStartTag (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseEndTag (xmlParserCtxtPtr ctxt); +#endif /* LIBXML_SAX1_ENABLED */ +XML_DEPRECATED +XMLPUBFUN void + xmlParseCDSect (xmlParserCtxtPtr ctxt); +XMLPUBFUN void + xmlParseContent (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseElement (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseVersionNum (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseVersionInfo (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseEncName (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN const xmlChar * + xmlParseEncodingDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int + xmlParseSDDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseXMLDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseTextDecl (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseMisc (xmlParserCtxtPtr ctxt); +XMLPUBFUN void + xmlParseExternalSubset (xmlParserCtxtPtr ctxt, + const xmlChar *ExternalID, + const xmlChar *SystemID); +/** + * XML_SUBSTITUTE_NONE: + * + * If no entities need to be substituted. + */ +#define XML_SUBSTITUTE_NONE 0 +/** + * XML_SUBSTITUTE_REF: + * + * Whether general entities need to be substituted. + */ +#define XML_SUBSTITUTE_REF 1 +/** + * XML_SUBSTITUTE_PEREF: + * + * Whether parameter entities need to be substituted. + */ +#define XML_SUBSTITUTE_PEREF 2 +/** + * XML_SUBSTITUTE_BOTH: + * + * Both general and parameter entities need to be substituted. + */ +#define XML_SUBSTITUTE_BOTH 3 + +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlStringDecodeEntities (xmlParserCtxtPtr ctxt, + const xmlChar *str, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlStringLenDecodeEntities (xmlParserCtxtPtr ctxt, + const xmlChar *str, + int len, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); + +/* + * Generated by MACROS on top of parser.c c.f. PUSH_AND_POP. + */ +XML_DEPRECATED +XMLPUBFUN int nodePush (xmlParserCtxtPtr ctxt, + xmlNodePtr value); +XML_DEPRECATED +XMLPUBFUN xmlNodePtr nodePop (xmlParserCtxtPtr ctxt); +XMLPUBFUN int inputPush (xmlParserCtxtPtr ctxt, + xmlParserInputPtr value); +XMLPUBFUN xmlParserInputPtr inputPop (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN const xmlChar * namePop (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int namePush (xmlParserCtxtPtr ctxt, + const xmlChar *value); + +/* + * other commodities shared between parser.c and parserInternals. + */ +XML_DEPRECATED +XMLPUBFUN int xmlSkipBlankChars (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int xmlStringCurrentChar (xmlParserCtxtPtr ctxt, + const xmlChar *cur, + int *len); +XML_DEPRECATED +XMLPUBFUN void xmlParserHandlePEReference(xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN int xmlCheckLanguageID (const xmlChar *lang); + +/* + * Really core function shared with HTML parser. + */ +XML_DEPRECATED +XMLPUBFUN int xmlCurrentChar (xmlParserCtxtPtr ctxt, + int *len); +XMLPUBFUN int xmlCopyCharMultiByte (xmlChar *out, + int val); +XMLPUBFUN int xmlCopyChar (int len, + xmlChar *out, + int val); +XML_DEPRECATED +XMLPUBFUN void xmlNextChar (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void xmlParserInputShrink (xmlParserInputPtr in); + +/* + * Specific function to keep track of entities references + * and used by the XSLT debugger. + */ +#ifdef LIBXML_LEGACY_ENABLED +/** + * xmlEntityReferenceFunc: + * @ent: the entity + * @firstNode: the fist node in the chunk + * @lastNode: the last nod in the chunk + * + * Callback function used when one needs to be able to track back the + * provenance of a chunk of nodes inherited from an entity replacement. + */ +typedef void (*xmlEntityReferenceFunc) (xmlEntityPtr ent, + xmlNodePtr firstNode, + xmlNodePtr lastNode); + +XML_DEPRECATED +XMLPUBFUN void xmlSetEntityReferenceFunc (xmlEntityReferenceFunc func); + +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlParseQuotedString (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void + xmlParseNamespace (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlNamespaceParseNSDef (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlScanName (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlNamespaceParseNCName (xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN void xmlParserHandleReference(xmlParserCtxtPtr ctxt); +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlNamespaceParseQName (xmlParserCtxtPtr ctxt, + xmlChar **prefix); +/** + * Entities + */ +XML_DEPRECATED +XMLPUBFUN xmlChar * + xmlDecodeEntities (xmlParserCtxtPtr ctxt, + int len, + int what, + xmlChar end, + xmlChar end2, + xmlChar end3); +XML_DEPRECATED +XMLPUBFUN void + xmlHandleEntity (xmlParserCtxtPtr ctxt, + xmlEntityPtr entity); + +#endif /* LIBXML_LEGACY_ENABLED */ + +#ifdef __cplusplus +} +#endif +#endif /* __XML_PARSER_INTERNALS_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/relaxng.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/relaxng.h new file mode 100644 index 00000000..079b7f12 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/relaxng.h @@ -0,0 +1,219 @@ +/* + * Summary: implementation of the Relax-NG validation + * Description: implementation of the Relax-NG validation + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_RELAX_NG__ +#define __XML_RELAX_NG__ + +#include <libxml/xmlversion.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlstring.h> +#include <libxml/tree.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _xmlRelaxNG xmlRelaxNG; +typedef xmlRelaxNG *xmlRelaxNGPtr; + + +/** + * xmlRelaxNGValidityErrorFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of an error callback from a Relax-NG validation + */ +typedef void (*xmlRelaxNGValidityErrorFunc) (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); + +/** + * xmlRelaxNGValidityWarningFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of a warning callback from a Relax-NG validation + */ +typedef void (*xmlRelaxNGValidityWarningFunc) (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); + +/** + * A schemas validation context + */ +typedef struct _xmlRelaxNGParserCtxt xmlRelaxNGParserCtxt; +typedef xmlRelaxNGParserCtxt *xmlRelaxNGParserCtxtPtr; + +typedef struct _xmlRelaxNGValidCtxt xmlRelaxNGValidCtxt; +typedef xmlRelaxNGValidCtxt *xmlRelaxNGValidCtxtPtr; + +/* + * xmlRelaxNGValidErr: + * + * List of possible Relax NG validation errors + */ +typedef enum { + XML_RELAXNG_OK = 0, + XML_RELAXNG_ERR_MEMORY, + XML_RELAXNG_ERR_TYPE, + XML_RELAXNG_ERR_TYPEVAL, + XML_RELAXNG_ERR_DUPID, + XML_RELAXNG_ERR_TYPECMP, + XML_RELAXNG_ERR_NOSTATE, + XML_RELAXNG_ERR_NODEFINE, + XML_RELAXNG_ERR_LISTEXTRA, + XML_RELAXNG_ERR_LISTEMPTY, + XML_RELAXNG_ERR_INTERNODATA, + XML_RELAXNG_ERR_INTERSEQ, + XML_RELAXNG_ERR_INTEREXTRA, + XML_RELAXNG_ERR_ELEMNAME, + XML_RELAXNG_ERR_ATTRNAME, + XML_RELAXNG_ERR_ELEMNONS, + XML_RELAXNG_ERR_ATTRNONS, + XML_RELAXNG_ERR_ELEMWRONGNS, + XML_RELAXNG_ERR_ATTRWRONGNS, + XML_RELAXNG_ERR_ELEMEXTRANS, + XML_RELAXNG_ERR_ATTREXTRANS, + XML_RELAXNG_ERR_ELEMNOTEMPTY, + XML_RELAXNG_ERR_NOELEM, + XML_RELAXNG_ERR_NOTELEM, + XML_RELAXNG_ERR_ATTRVALID, + XML_RELAXNG_ERR_CONTENTVALID, + XML_RELAXNG_ERR_EXTRACONTENT, + XML_RELAXNG_ERR_INVALIDATTR, + XML_RELAXNG_ERR_DATAELEM, + XML_RELAXNG_ERR_VALELEM, + XML_RELAXNG_ERR_LISTELEM, + XML_RELAXNG_ERR_DATATYPE, + XML_RELAXNG_ERR_VALUE, + XML_RELAXNG_ERR_LIST, + XML_RELAXNG_ERR_NOGRAMMAR, + XML_RELAXNG_ERR_EXTRADATA, + XML_RELAXNG_ERR_LACKDATA, + XML_RELAXNG_ERR_INTERNAL, + XML_RELAXNG_ERR_ELEMWRONG, + XML_RELAXNG_ERR_TEXTWRONG +} xmlRelaxNGValidErr; + +/* + * xmlRelaxNGParserFlags: + * + * List of possible Relax NG Parser flags + */ +typedef enum { + XML_RELAXNGP_NONE = 0, + XML_RELAXNGP_FREE_DOC = 1, + XML_RELAXNGP_CRNG = 2 +} xmlRelaxNGParserFlag; + +XMLPUBFUN int + xmlRelaxNGInitTypes (void); +XML_DEPRECATED +XMLPUBFUN void + xmlRelaxNGCleanupTypes (void); + +/* + * Interfaces for parsing. + */ +XMLPUBFUN xmlRelaxNGParserCtxtPtr + xmlRelaxNGNewParserCtxt (const char *URL); +XMLPUBFUN xmlRelaxNGParserCtxtPtr + xmlRelaxNGNewMemParserCtxt (const char *buffer, + int size); +XMLPUBFUN xmlRelaxNGParserCtxtPtr + xmlRelaxNGNewDocParserCtxt (xmlDocPtr doc); + +XMLPUBFUN int + xmlRelaxParserSetFlag (xmlRelaxNGParserCtxtPtr ctxt, + int flag); + +XMLPUBFUN void + xmlRelaxNGFreeParserCtxt (xmlRelaxNGParserCtxtPtr ctxt); +XMLPUBFUN void + xmlRelaxNGSetParserErrors(xmlRelaxNGParserCtxtPtr ctxt, + xmlRelaxNGValidityErrorFunc err, + xmlRelaxNGValidityWarningFunc warn, + void *ctx); +XMLPUBFUN int + xmlRelaxNGGetParserErrors(xmlRelaxNGParserCtxtPtr ctxt, + xmlRelaxNGValidityErrorFunc *err, + xmlRelaxNGValidityWarningFunc *warn, + void **ctx); +XMLPUBFUN void + xmlRelaxNGSetParserStructuredErrors( + xmlRelaxNGParserCtxtPtr ctxt, + xmlStructuredErrorFunc serror, + void *ctx); +XMLPUBFUN xmlRelaxNGPtr + xmlRelaxNGParse (xmlRelaxNGParserCtxtPtr ctxt); +XMLPUBFUN void + xmlRelaxNGFree (xmlRelaxNGPtr schema); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlRelaxNGDump (FILE *output, + xmlRelaxNGPtr schema); +XMLPUBFUN void + xmlRelaxNGDumpTree (FILE * output, + xmlRelaxNGPtr schema); +#endif /* LIBXML_OUTPUT_ENABLED */ +/* + * Interfaces for validating + */ +XMLPUBFUN void + xmlRelaxNGSetValidErrors(xmlRelaxNGValidCtxtPtr ctxt, + xmlRelaxNGValidityErrorFunc err, + xmlRelaxNGValidityWarningFunc warn, + void *ctx); +XMLPUBFUN int + xmlRelaxNGGetValidErrors(xmlRelaxNGValidCtxtPtr ctxt, + xmlRelaxNGValidityErrorFunc *err, + xmlRelaxNGValidityWarningFunc *warn, + void **ctx); +XMLPUBFUN void + xmlRelaxNGSetValidStructuredErrors(xmlRelaxNGValidCtxtPtr ctxt, + xmlStructuredErrorFunc serror, void *ctx); +XMLPUBFUN xmlRelaxNGValidCtxtPtr + xmlRelaxNGNewValidCtxt (xmlRelaxNGPtr schema); +XMLPUBFUN void + xmlRelaxNGFreeValidCtxt (xmlRelaxNGValidCtxtPtr ctxt); +XMLPUBFUN int + xmlRelaxNGValidateDoc (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc); +/* + * Interfaces for progressive validation when possible + */ +XMLPUBFUN int + xmlRelaxNGValidatePushElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +XMLPUBFUN int + xmlRelaxNGValidatePushCData (xmlRelaxNGValidCtxtPtr ctxt, + const xmlChar *data, + int len); +XMLPUBFUN int + xmlRelaxNGValidatePopElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +XMLPUBFUN int + xmlRelaxNGValidateFullElement (xmlRelaxNGValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ + +#endif /* __XML_RELAX_NG__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schemasInternals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schemasInternals.h new file mode 100644 index 00000000..e9d3b3c7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schemasInternals.h @@ -0,0 +1,959 @@ +/* + * Summary: internal interfaces for XML Schemas + * Description: internal interfaces for the XML Schemas handling + * and schema validity checking + * The Schemas development is a Work In Progress. + * Some of those interfaces are not guaranteed to be API or ABI stable ! + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SCHEMA_INTERNALS_H__ +#define __XML_SCHEMA_INTERNALS_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <libxml/xmlregexp.h> +#include <libxml/hash.h> +#include <libxml/dict.h> +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_SCHEMAS_UNKNOWN = 0, + XML_SCHEMAS_STRING = 1, + XML_SCHEMAS_NORMSTRING = 2, + XML_SCHEMAS_DECIMAL = 3, + XML_SCHEMAS_TIME = 4, + XML_SCHEMAS_GDAY = 5, + XML_SCHEMAS_GMONTH = 6, + XML_SCHEMAS_GMONTHDAY = 7, + XML_SCHEMAS_GYEAR = 8, + XML_SCHEMAS_GYEARMONTH = 9, + XML_SCHEMAS_DATE = 10, + XML_SCHEMAS_DATETIME = 11, + XML_SCHEMAS_DURATION = 12, + XML_SCHEMAS_FLOAT = 13, + XML_SCHEMAS_DOUBLE = 14, + XML_SCHEMAS_BOOLEAN = 15, + XML_SCHEMAS_TOKEN = 16, + XML_SCHEMAS_LANGUAGE = 17, + XML_SCHEMAS_NMTOKEN = 18, + XML_SCHEMAS_NMTOKENS = 19, + XML_SCHEMAS_NAME = 20, + XML_SCHEMAS_QNAME = 21, + XML_SCHEMAS_NCNAME = 22, + XML_SCHEMAS_ID = 23, + XML_SCHEMAS_IDREF = 24, + XML_SCHEMAS_IDREFS = 25, + XML_SCHEMAS_ENTITY = 26, + XML_SCHEMAS_ENTITIES = 27, + XML_SCHEMAS_NOTATION = 28, + XML_SCHEMAS_ANYURI = 29, + XML_SCHEMAS_INTEGER = 30, + XML_SCHEMAS_NPINTEGER = 31, + XML_SCHEMAS_NINTEGER = 32, + XML_SCHEMAS_NNINTEGER = 33, + XML_SCHEMAS_PINTEGER = 34, + XML_SCHEMAS_INT = 35, + XML_SCHEMAS_UINT = 36, + XML_SCHEMAS_LONG = 37, + XML_SCHEMAS_ULONG = 38, + XML_SCHEMAS_SHORT = 39, + XML_SCHEMAS_USHORT = 40, + XML_SCHEMAS_BYTE = 41, + XML_SCHEMAS_UBYTE = 42, + XML_SCHEMAS_HEXBINARY = 43, + XML_SCHEMAS_BASE64BINARY = 44, + XML_SCHEMAS_ANYTYPE = 45, + XML_SCHEMAS_ANYSIMPLETYPE = 46 +} xmlSchemaValType; + +/* + * XML Schemas defines multiple type of types. + */ +typedef enum { + XML_SCHEMA_TYPE_BASIC = 1, /* A built-in datatype */ + XML_SCHEMA_TYPE_ANY, + XML_SCHEMA_TYPE_FACET, + XML_SCHEMA_TYPE_SIMPLE, + XML_SCHEMA_TYPE_COMPLEX, + XML_SCHEMA_TYPE_SEQUENCE = 6, + XML_SCHEMA_TYPE_CHOICE, + XML_SCHEMA_TYPE_ALL, + XML_SCHEMA_TYPE_SIMPLE_CONTENT, + XML_SCHEMA_TYPE_COMPLEX_CONTENT, + XML_SCHEMA_TYPE_UR, + XML_SCHEMA_TYPE_RESTRICTION, + XML_SCHEMA_TYPE_EXTENSION, + XML_SCHEMA_TYPE_ELEMENT, + XML_SCHEMA_TYPE_ATTRIBUTE, + XML_SCHEMA_TYPE_ATTRIBUTEGROUP, + XML_SCHEMA_TYPE_GROUP, + XML_SCHEMA_TYPE_NOTATION, + XML_SCHEMA_TYPE_LIST, + XML_SCHEMA_TYPE_UNION, + XML_SCHEMA_TYPE_ANY_ATTRIBUTE, + XML_SCHEMA_TYPE_IDC_UNIQUE, + XML_SCHEMA_TYPE_IDC_KEY, + XML_SCHEMA_TYPE_IDC_KEYREF, + XML_SCHEMA_TYPE_PARTICLE = 25, + XML_SCHEMA_TYPE_ATTRIBUTE_USE, + XML_SCHEMA_FACET_MININCLUSIVE = 1000, + XML_SCHEMA_FACET_MINEXCLUSIVE, + XML_SCHEMA_FACET_MAXINCLUSIVE, + XML_SCHEMA_FACET_MAXEXCLUSIVE, + XML_SCHEMA_FACET_TOTALDIGITS, + XML_SCHEMA_FACET_FRACTIONDIGITS, + XML_SCHEMA_FACET_PATTERN, + XML_SCHEMA_FACET_ENUMERATION, + XML_SCHEMA_FACET_WHITESPACE, + XML_SCHEMA_FACET_LENGTH, + XML_SCHEMA_FACET_MAXLENGTH, + XML_SCHEMA_FACET_MINLENGTH, + XML_SCHEMA_EXTRA_QNAMEREF = 2000, + XML_SCHEMA_EXTRA_ATTR_USE_PROHIB +} xmlSchemaTypeType; + +typedef enum { + XML_SCHEMA_CONTENT_UNKNOWN = 0, + XML_SCHEMA_CONTENT_EMPTY = 1, + XML_SCHEMA_CONTENT_ELEMENTS, + XML_SCHEMA_CONTENT_MIXED, + XML_SCHEMA_CONTENT_SIMPLE, + XML_SCHEMA_CONTENT_MIXED_OR_ELEMENTS, /* Obsolete */ + XML_SCHEMA_CONTENT_BASIC, + XML_SCHEMA_CONTENT_ANY +} xmlSchemaContentType; + +typedef struct _xmlSchemaVal xmlSchemaVal; +typedef xmlSchemaVal *xmlSchemaValPtr; + +typedef struct _xmlSchemaType xmlSchemaType; +typedef xmlSchemaType *xmlSchemaTypePtr; + +typedef struct _xmlSchemaFacet xmlSchemaFacet; +typedef xmlSchemaFacet *xmlSchemaFacetPtr; + +/** + * Annotation + */ +typedef struct _xmlSchemaAnnot xmlSchemaAnnot; +typedef xmlSchemaAnnot *xmlSchemaAnnotPtr; +struct _xmlSchemaAnnot { + struct _xmlSchemaAnnot *next; + xmlNodePtr content; /* the annotation */ +}; + +/** + * XML_SCHEMAS_ANYATTR_SKIP: + * + * Skip unknown attribute from validation + * Obsolete, not used anymore. + */ +#define XML_SCHEMAS_ANYATTR_SKIP 1 +/** + * XML_SCHEMAS_ANYATTR_LAX: + * + * Ignore validation non definition on attributes + * Obsolete, not used anymore. + */ +#define XML_SCHEMAS_ANYATTR_LAX 2 +/** + * XML_SCHEMAS_ANYATTR_STRICT: + * + * Apply strict validation rules on attributes + * Obsolete, not used anymore. + */ +#define XML_SCHEMAS_ANYATTR_STRICT 3 +/** + * XML_SCHEMAS_ANY_SKIP: + * + * Skip unknown attribute from validation + */ +#define XML_SCHEMAS_ANY_SKIP 1 +/** + * XML_SCHEMAS_ANY_LAX: + * + * Used by wildcards. + * Validate if type found, don't worry if not found + */ +#define XML_SCHEMAS_ANY_LAX 2 +/** + * XML_SCHEMAS_ANY_STRICT: + * + * Used by wildcards. + * Apply strict validation rules + */ +#define XML_SCHEMAS_ANY_STRICT 3 +/** + * XML_SCHEMAS_ATTR_USE_PROHIBITED: + * + * Used by wildcards. + * The attribute is prohibited. + */ +#define XML_SCHEMAS_ATTR_USE_PROHIBITED 0 +/** + * XML_SCHEMAS_ATTR_USE_REQUIRED: + * + * The attribute is required. + */ +#define XML_SCHEMAS_ATTR_USE_REQUIRED 1 +/** + * XML_SCHEMAS_ATTR_USE_OPTIONAL: + * + * The attribute is optional. + */ +#define XML_SCHEMAS_ATTR_USE_OPTIONAL 2 +/** + * XML_SCHEMAS_ATTR_GLOBAL: + * + * allow elements in no namespace + */ +#define XML_SCHEMAS_ATTR_GLOBAL 1 << 0 +/** + * XML_SCHEMAS_ATTR_NSDEFAULT: + * + * allow elements in no namespace + */ +#define XML_SCHEMAS_ATTR_NSDEFAULT 1 << 7 +/** + * XML_SCHEMAS_ATTR_INTERNAL_RESOLVED: + * + * this is set when the "type" and "ref" references + * have been resolved. + */ +#define XML_SCHEMAS_ATTR_INTERNAL_RESOLVED 1 << 8 +/** + * XML_SCHEMAS_ATTR_FIXED: + * + * the attribute has a fixed value + */ +#define XML_SCHEMAS_ATTR_FIXED 1 << 9 + +/** + * xmlSchemaAttribute: + * An attribute definition. + */ + +typedef struct _xmlSchemaAttribute xmlSchemaAttribute; +typedef xmlSchemaAttribute *xmlSchemaAttributePtr; +struct _xmlSchemaAttribute { + xmlSchemaTypeType type; + struct _xmlSchemaAttribute *next; /* the next attribute (not used?) */ + const xmlChar *name; /* the name of the declaration */ + const xmlChar *id; /* Deprecated; not used */ + const xmlChar *ref; /* Deprecated; not used */ + const xmlChar *refNs; /* Deprecated; not used */ + const xmlChar *typeName; /* the local name of the type definition */ + const xmlChar *typeNs; /* the ns URI of the type definition */ + xmlSchemaAnnotPtr annot; + + xmlSchemaTypePtr base; /* Deprecated; not used */ + int occurs; /* Deprecated; not used */ + const xmlChar *defValue; /* The initial value of the value constraint */ + xmlSchemaTypePtr subtypes; /* the type definition */ + xmlNodePtr node; + const xmlChar *targetNamespace; + int flags; + const xmlChar *refPrefix; /* Deprecated; not used */ + xmlSchemaValPtr defVal; /* The compiled value constraint */ + xmlSchemaAttributePtr refDecl; /* Deprecated; not used */ +}; + +/** + * xmlSchemaAttributeLink: + * Used to build a list of attribute uses on complexType definitions. + * WARNING: Deprecated; not used. + */ +typedef struct _xmlSchemaAttributeLink xmlSchemaAttributeLink; +typedef xmlSchemaAttributeLink *xmlSchemaAttributeLinkPtr; +struct _xmlSchemaAttributeLink { + struct _xmlSchemaAttributeLink *next;/* the next attribute link ... */ + struct _xmlSchemaAttribute *attr;/* the linked attribute */ +}; + +/** + * XML_SCHEMAS_WILDCARD_COMPLETE: + * + * If the wildcard is complete. + */ +#define XML_SCHEMAS_WILDCARD_COMPLETE 1 << 0 + +/** + * xmlSchemaCharValueLink: + * Used to build a list of namespaces on wildcards. + */ +typedef struct _xmlSchemaWildcardNs xmlSchemaWildcardNs; +typedef xmlSchemaWildcardNs *xmlSchemaWildcardNsPtr; +struct _xmlSchemaWildcardNs { + struct _xmlSchemaWildcardNs *next;/* the next constraint link ... */ + const xmlChar *value;/* the value */ +}; + +/** + * xmlSchemaWildcard. + * A wildcard. + */ +typedef struct _xmlSchemaWildcard xmlSchemaWildcard; +typedef xmlSchemaWildcard *xmlSchemaWildcardPtr; +struct _xmlSchemaWildcard { + xmlSchemaTypeType type; /* The kind of type */ + const xmlChar *id; /* Deprecated; not used */ + xmlSchemaAnnotPtr annot; + xmlNodePtr node; + int minOccurs; /* Deprecated; not used */ + int maxOccurs; /* Deprecated; not used */ + int processContents; + int any; /* Indicates if the ns constraint is of ##any */ + xmlSchemaWildcardNsPtr nsSet; /* The list of allowed namespaces */ + xmlSchemaWildcardNsPtr negNsSet; /* The negated namespace */ + int flags; +}; + +/** + * XML_SCHEMAS_ATTRGROUP_WILDCARD_BUILDED: + * + * The attribute wildcard has been built. + */ +#define XML_SCHEMAS_ATTRGROUP_WILDCARD_BUILDED 1 << 0 +/** + * XML_SCHEMAS_ATTRGROUP_GLOBAL: + * + * The attribute group has been defined. + */ +#define XML_SCHEMAS_ATTRGROUP_GLOBAL 1 << 1 +/** + * XML_SCHEMAS_ATTRGROUP_MARKED: + * + * Marks the attr group as marked; used for circular checks. + */ +#define XML_SCHEMAS_ATTRGROUP_MARKED 1 << 2 + +/** + * XML_SCHEMAS_ATTRGROUP_REDEFINED: + * + * The attr group was redefined. + */ +#define XML_SCHEMAS_ATTRGROUP_REDEFINED 1 << 3 +/** + * XML_SCHEMAS_ATTRGROUP_HAS_REFS: + * + * Whether this attr. group contains attr. group references. + */ +#define XML_SCHEMAS_ATTRGROUP_HAS_REFS 1 << 4 + +/** + * An attribute group definition. + * + * xmlSchemaAttribute and xmlSchemaAttributeGroup start of structures + * must be kept similar + */ +typedef struct _xmlSchemaAttributeGroup xmlSchemaAttributeGroup; +typedef xmlSchemaAttributeGroup *xmlSchemaAttributeGroupPtr; +struct _xmlSchemaAttributeGroup { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaAttribute *next;/* the next attribute if in a group ... */ + const xmlChar *name; + const xmlChar *id; + const xmlChar *ref; /* Deprecated; not used */ + const xmlChar *refNs; /* Deprecated; not used */ + xmlSchemaAnnotPtr annot; + + xmlSchemaAttributePtr attributes; /* Deprecated; not used */ + xmlNodePtr node; + int flags; + xmlSchemaWildcardPtr attributeWildcard; + const xmlChar *refPrefix; /* Deprecated; not used */ + xmlSchemaAttributeGroupPtr refItem; /* Deprecated; not used */ + const xmlChar *targetNamespace; + void *attrUses; +}; + +/** + * xmlSchemaTypeLink: + * Used to build a list of types (e.g. member types of + * simpleType with variety "union"). + */ +typedef struct _xmlSchemaTypeLink xmlSchemaTypeLink; +typedef xmlSchemaTypeLink *xmlSchemaTypeLinkPtr; +struct _xmlSchemaTypeLink { + struct _xmlSchemaTypeLink *next;/* the next type link ... */ + xmlSchemaTypePtr type;/* the linked type */ +}; + +/** + * xmlSchemaFacetLink: + * Used to build a list of facets. + */ +typedef struct _xmlSchemaFacetLink xmlSchemaFacetLink; +typedef xmlSchemaFacetLink *xmlSchemaFacetLinkPtr; +struct _xmlSchemaFacetLink { + struct _xmlSchemaFacetLink *next;/* the next facet link ... */ + xmlSchemaFacetPtr facet;/* the linked facet */ +}; + +/** + * XML_SCHEMAS_TYPE_MIXED: + * + * the element content type is mixed + */ +#define XML_SCHEMAS_TYPE_MIXED 1 << 0 +/** + * XML_SCHEMAS_TYPE_DERIVATION_METHOD_EXTENSION: + * + * the simple or complex type has a derivation method of "extension". + */ +#define XML_SCHEMAS_TYPE_DERIVATION_METHOD_EXTENSION 1 << 1 +/** + * XML_SCHEMAS_TYPE_DERIVATION_METHOD_RESTRICTION: + * + * the simple or complex type has a derivation method of "restriction". + */ +#define XML_SCHEMAS_TYPE_DERIVATION_METHOD_RESTRICTION 1 << 2 +/** + * XML_SCHEMAS_TYPE_GLOBAL: + * + * the type is global + */ +#define XML_SCHEMAS_TYPE_GLOBAL 1 << 3 +/** + * XML_SCHEMAS_TYPE_OWNED_ATTR_WILDCARD: + * + * the complexType owns an attribute wildcard, i.e. + * it can be freed by the complexType + */ +#define XML_SCHEMAS_TYPE_OWNED_ATTR_WILDCARD 1 << 4 /* Obsolete. */ +/** + * XML_SCHEMAS_TYPE_VARIETY_ABSENT: + * + * the simpleType has a variety of "absent". + * TODO: Actually not necessary :-/, since if + * none of the variety flags occur then it's + * automatically absent. + */ +#define XML_SCHEMAS_TYPE_VARIETY_ABSENT 1 << 5 +/** + * XML_SCHEMAS_TYPE_VARIETY_LIST: + * + * the simpleType has a variety of "list". + */ +#define XML_SCHEMAS_TYPE_VARIETY_LIST 1 << 6 +/** + * XML_SCHEMAS_TYPE_VARIETY_UNION: + * + * the simpleType has a variety of "union". + */ +#define XML_SCHEMAS_TYPE_VARIETY_UNION 1 << 7 +/** + * XML_SCHEMAS_TYPE_VARIETY_ATOMIC: + * + * the simpleType has a variety of "union". + */ +#define XML_SCHEMAS_TYPE_VARIETY_ATOMIC 1 << 8 +/** + * XML_SCHEMAS_TYPE_FINAL_EXTENSION: + * + * the complexType has a final of "extension". + */ +#define XML_SCHEMAS_TYPE_FINAL_EXTENSION 1 << 9 +/** + * XML_SCHEMAS_TYPE_FINAL_RESTRICTION: + * + * the simpleType/complexType has a final of "restriction". + */ +#define XML_SCHEMAS_TYPE_FINAL_RESTRICTION 1 << 10 +/** + * XML_SCHEMAS_TYPE_FINAL_LIST: + * + * the simpleType has a final of "list". + */ +#define XML_SCHEMAS_TYPE_FINAL_LIST 1 << 11 +/** + * XML_SCHEMAS_TYPE_FINAL_UNION: + * + * the simpleType has a final of "union". + */ +#define XML_SCHEMAS_TYPE_FINAL_UNION 1 << 12 +/** + * XML_SCHEMAS_TYPE_FINAL_DEFAULT: + * + * the simpleType has a final of "default". + */ +#define XML_SCHEMAS_TYPE_FINAL_DEFAULT 1 << 13 +/** + * XML_SCHEMAS_TYPE_BUILTIN_PRIMITIVE: + * + * Marks the item as a builtin primitive. + */ +#define XML_SCHEMAS_TYPE_BUILTIN_PRIMITIVE 1 << 14 +/** + * XML_SCHEMAS_TYPE_MARKED: + * + * Marks the item as marked; used for circular checks. + */ +#define XML_SCHEMAS_TYPE_MARKED 1 << 16 +/** + * XML_SCHEMAS_TYPE_BLOCK_DEFAULT: + * + * the complexType did not specify 'block' so use the default of the + * <schema> item. + */ +#define XML_SCHEMAS_TYPE_BLOCK_DEFAULT 1 << 17 +/** + * XML_SCHEMAS_TYPE_BLOCK_EXTENSION: + * + * the complexType has a 'block' of "extension". + */ +#define XML_SCHEMAS_TYPE_BLOCK_EXTENSION 1 << 18 +/** + * XML_SCHEMAS_TYPE_BLOCK_RESTRICTION: + * + * the complexType has a 'block' of "restriction". + */ +#define XML_SCHEMAS_TYPE_BLOCK_RESTRICTION 1 << 19 +/** + * XML_SCHEMAS_TYPE_ABSTRACT: + * + * the simple/complexType is abstract. + */ +#define XML_SCHEMAS_TYPE_ABSTRACT 1 << 20 +/** + * XML_SCHEMAS_TYPE_FACETSNEEDVALUE: + * + * indicates if the facets need a computed value + */ +#define XML_SCHEMAS_TYPE_FACETSNEEDVALUE 1 << 21 +/** + * XML_SCHEMAS_TYPE_INTERNAL_RESOLVED: + * + * indicates that the type was typefixed + */ +#define XML_SCHEMAS_TYPE_INTERNAL_RESOLVED 1 << 22 +/** + * XML_SCHEMAS_TYPE_INTERNAL_INVALID: + * + * indicates that the type is invalid + */ +#define XML_SCHEMAS_TYPE_INTERNAL_INVALID 1 << 23 +/** + * XML_SCHEMAS_TYPE_WHITESPACE_PRESERVE: + * + * a whitespace-facet value of "preserve" + */ +#define XML_SCHEMAS_TYPE_WHITESPACE_PRESERVE 1 << 24 +/** + * XML_SCHEMAS_TYPE_WHITESPACE_REPLACE: + * + * a whitespace-facet value of "replace" + */ +#define XML_SCHEMAS_TYPE_WHITESPACE_REPLACE 1 << 25 +/** + * XML_SCHEMAS_TYPE_WHITESPACE_COLLAPSE: + * + * a whitespace-facet value of "collapse" + */ +#define XML_SCHEMAS_TYPE_WHITESPACE_COLLAPSE 1 << 26 +/** + * XML_SCHEMAS_TYPE_HAS_FACETS: + * + * has facets + */ +#define XML_SCHEMAS_TYPE_HAS_FACETS 1 << 27 +/** + * XML_SCHEMAS_TYPE_NORMVALUENEEDED: + * + * indicates if the facets (pattern) need a normalized value + */ +#define XML_SCHEMAS_TYPE_NORMVALUENEEDED 1 << 28 + +/** + * XML_SCHEMAS_TYPE_FIXUP_1: + * + * First stage of fixup was done. + */ +#define XML_SCHEMAS_TYPE_FIXUP_1 1 << 29 + +/** + * XML_SCHEMAS_TYPE_REDEFINED: + * + * The type was redefined. + */ +#define XML_SCHEMAS_TYPE_REDEFINED 1 << 30 +/** + * XML_SCHEMAS_TYPE_REDEFINING: + * + * The type redefines an other type. + */ +/* #define XML_SCHEMAS_TYPE_REDEFINING 1 << 31 */ + +/** + * _xmlSchemaType: + * + * Schemas type definition. + */ +struct _xmlSchemaType { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next; /* the next type if in a sequence ... */ + const xmlChar *name; + const xmlChar *id ; /* Deprecated; not used */ + const xmlChar *ref; /* Deprecated; not used */ + const xmlChar *refNs; /* Deprecated; not used */ + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; + xmlSchemaAttributePtr attributes; /* Deprecated; not used */ + xmlNodePtr node; + int minOccurs; /* Deprecated; not used */ + int maxOccurs; /* Deprecated; not used */ + + int flags; + xmlSchemaContentType contentType; + const xmlChar *base; /* Base type's local name */ + const xmlChar *baseNs; /* Base type's target namespace */ + xmlSchemaTypePtr baseType; /* The base type component */ + xmlSchemaFacetPtr facets; /* Local facets */ + struct _xmlSchemaType *redef; /* Deprecated; not used */ + int recurse; /* Obsolete */ + xmlSchemaAttributeLinkPtr *attributeUses; /* Deprecated; not used */ + xmlSchemaWildcardPtr attributeWildcard; + int builtInType; /* Type of built-in types. */ + xmlSchemaTypeLinkPtr memberTypes; /* member-types if a union type. */ + xmlSchemaFacetLinkPtr facetSet; /* All facets (incl. inherited) */ + const xmlChar *refPrefix; /* Deprecated; not used */ + xmlSchemaTypePtr contentTypeDef; /* Used for the simple content of complex types. + Could we use @subtypes for this? */ + xmlRegexpPtr contModel; /* Holds the automaton of the content model */ + const xmlChar *targetNamespace; + void *attrUses; +}; + +/* + * xmlSchemaElement: + * An element definition. + * + * xmlSchemaType, xmlSchemaFacet and xmlSchemaElement start of + * structures must be kept similar + */ +/** + * XML_SCHEMAS_ELEM_NILLABLE: + * + * the element is nillable + */ +#define XML_SCHEMAS_ELEM_NILLABLE 1 << 0 +/** + * XML_SCHEMAS_ELEM_GLOBAL: + * + * the element is global + */ +#define XML_SCHEMAS_ELEM_GLOBAL 1 << 1 +/** + * XML_SCHEMAS_ELEM_DEFAULT: + * + * the element has a default value + */ +#define XML_SCHEMAS_ELEM_DEFAULT 1 << 2 +/** + * XML_SCHEMAS_ELEM_FIXED: + * + * the element has a fixed value + */ +#define XML_SCHEMAS_ELEM_FIXED 1 << 3 +/** + * XML_SCHEMAS_ELEM_ABSTRACT: + * + * the element is abstract + */ +#define XML_SCHEMAS_ELEM_ABSTRACT 1 << 4 +/** + * XML_SCHEMAS_ELEM_TOPLEVEL: + * + * the element is top level + * obsolete: use XML_SCHEMAS_ELEM_GLOBAL instead + */ +#define XML_SCHEMAS_ELEM_TOPLEVEL 1 << 5 +/** + * XML_SCHEMAS_ELEM_REF: + * + * the element is a reference to a type + */ +#define XML_SCHEMAS_ELEM_REF 1 << 6 +/** + * XML_SCHEMAS_ELEM_NSDEFAULT: + * + * allow elements in no namespace + * Obsolete, not used anymore. + */ +#define XML_SCHEMAS_ELEM_NSDEFAULT 1 << 7 +/** + * XML_SCHEMAS_ELEM_INTERNAL_RESOLVED: + * + * this is set when "type", "ref", "substitutionGroup" + * references have been resolved. + */ +#define XML_SCHEMAS_ELEM_INTERNAL_RESOLVED 1 << 8 + /** + * XML_SCHEMAS_ELEM_CIRCULAR: + * + * a helper flag for the search of circular references. + */ +#define XML_SCHEMAS_ELEM_CIRCULAR 1 << 9 +/** + * XML_SCHEMAS_ELEM_BLOCK_ABSENT: + * + * the "block" attribute is absent + */ +#define XML_SCHEMAS_ELEM_BLOCK_ABSENT 1 << 10 +/** + * XML_SCHEMAS_ELEM_BLOCK_EXTENSION: + * + * disallowed substitutions are absent + */ +#define XML_SCHEMAS_ELEM_BLOCK_EXTENSION 1 << 11 +/** + * XML_SCHEMAS_ELEM_BLOCK_RESTRICTION: + * + * disallowed substitutions: "restriction" + */ +#define XML_SCHEMAS_ELEM_BLOCK_RESTRICTION 1 << 12 +/** + * XML_SCHEMAS_ELEM_BLOCK_SUBSTITUTION: + * + * disallowed substitutions: "substitution" + */ +#define XML_SCHEMAS_ELEM_BLOCK_SUBSTITUTION 1 << 13 +/** + * XML_SCHEMAS_ELEM_FINAL_ABSENT: + * + * substitution group exclusions are absent + */ +#define XML_SCHEMAS_ELEM_FINAL_ABSENT 1 << 14 +/** + * XML_SCHEMAS_ELEM_FINAL_EXTENSION: + * + * substitution group exclusions: "extension" + */ +#define XML_SCHEMAS_ELEM_FINAL_EXTENSION 1 << 15 +/** + * XML_SCHEMAS_ELEM_FINAL_RESTRICTION: + * + * substitution group exclusions: "restriction" + */ +#define XML_SCHEMAS_ELEM_FINAL_RESTRICTION 1 << 16 +/** + * XML_SCHEMAS_ELEM_SUBST_GROUP_HEAD: + * + * the declaration is a substitution group head + */ +#define XML_SCHEMAS_ELEM_SUBST_GROUP_HEAD 1 << 17 +/** + * XML_SCHEMAS_ELEM_INTERNAL_CHECKED: + * + * this is set when the elem decl has been checked against + * all constraints + */ +#define XML_SCHEMAS_ELEM_INTERNAL_CHECKED 1 << 18 + +typedef struct _xmlSchemaElement xmlSchemaElement; +typedef xmlSchemaElement *xmlSchemaElementPtr; +struct _xmlSchemaElement { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaType *next; /* Not used? */ + const xmlChar *name; + const xmlChar *id; /* Deprecated; not used */ + const xmlChar *ref; /* Deprecated; not used */ + const xmlChar *refNs; /* Deprecated; not used */ + xmlSchemaAnnotPtr annot; + xmlSchemaTypePtr subtypes; /* the type definition */ + xmlSchemaAttributePtr attributes; + xmlNodePtr node; + int minOccurs; /* Deprecated; not used */ + int maxOccurs; /* Deprecated; not used */ + + int flags; + const xmlChar *targetNamespace; + const xmlChar *namedType; + const xmlChar *namedTypeNs; + const xmlChar *substGroup; + const xmlChar *substGroupNs; + const xmlChar *scope; + const xmlChar *value; /* The original value of the value constraint. */ + struct _xmlSchemaElement *refDecl; /* This will now be used for the + substitution group affiliation */ + xmlRegexpPtr contModel; /* Obsolete for WXS, maybe used for RelaxNG */ + xmlSchemaContentType contentType; + const xmlChar *refPrefix; /* Deprecated; not used */ + xmlSchemaValPtr defVal; /* The compiled value constraint. */ + void *idcs; /* The identity-constraint defs */ +}; + +/* + * XML_SCHEMAS_FACET_UNKNOWN: + * + * unknown facet handling + */ +#define XML_SCHEMAS_FACET_UNKNOWN 0 +/* + * XML_SCHEMAS_FACET_PRESERVE: + * + * preserve the type of the facet + */ +#define XML_SCHEMAS_FACET_PRESERVE 1 +/* + * XML_SCHEMAS_FACET_REPLACE: + * + * replace the type of the facet + */ +#define XML_SCHEMAS_FACET_REPLACE 2 +/* + * XML_SCHEMAS_FACET_COLLAPSE: + * + * collapse the types of the facet + */ +#define XML_SCHEMAS_FACET_COLLAPSE 3 +/** + * A facet definition. + */ +struct _xmlSchemaFacet { + xmlSchemaTypeType type; /* The kind of type */ + struct _xmlSchemaFacet *next;/* the next type if in a sequence ... */ + const xmlChar *value; /* The original value */ + const xmlChar *id; /* Obsolete */ + xmlSchemaAnnotPtr annot; + xmlNodePtr node; + int fixed; /* XML_SCHEMAS_FACET_PRESERVE, etc. */ + int whitespace; + xmlSchemaValPtr val; /* The compiled value */ + xmlRegexpPtr regexp; /* The regex for patterns */ +}; + +/** + * A notation definition. + */ +typedef struct _xmlSchemaNotation xmlSchemaNotation; +typedef xmlSchemaNotation *xmlSchemaNotationPtr; +struct _xmlSchemaNotation { + xmlSchemaTypeType type; /* The kind of type */ + const xmlChar *name; + xmlSchemaAnnotPtr annot; + const xmlChar *identifier; + const xmlChar *targetNamespace; +}; + +/* +* TODO: Actually all those flags used for the schema should sit +* on the schema parser context, since they are used only +* during parsing an XML schema document, and not available +* on the component level as per spec. +*/ +/** + * XML_SCHEMAS_QUALIF_ELEM: + * + * Reflects elementFormDefault == qualified in + * an XML schema document. + */ +#define XML_SCHEMAS_QUALIF_ELEM 1 << 0 +/** + * XML_SCHEMAS_QUALIF_ATTR: + * + * Reflects attributeFormDefault == qualified in + * an XML schema document. + */ +#define XML_SCHEMAS_QUALIF_ATTR 1 << 1 +/** + * XML_SCHEMAS_FINAL_DEFAULT_EXTENSION: + * + * the schema has "extension" in the set of finalDefault. + */ +#define XML_SCHEMAS_FINAL_DEFAULT_EXTENSION 1 << 2 +/** + * XML_SCHEMAS_FINAL_DEFAULT_RESTRICTION: + * + * the schema has "restriction" in the set of finalDefault. + */ +#define XML_SCHEMAS_FINAL_DEFAULT_RESTRICTION 1 << 3 +/** + * XML_SCHEMAS_FINAL_DEFAULT_LIST: + * + * the schema has "list" in the set of finalDefault. + */ +#define XML_SCHEMAS_FINAL_DEFAULT_LIST 1 << 4 +/** + * XML_SCHEMAS_FINAL_DEFAULT_UNION: + * + * the schema has "union" in the set of finalDefault. + */ +#define XML_SCHEMAS_FINAL_DEFAULT_UNION 1 << 5 +/** + * XML_SCHEMAS_BLOCK_DEFAULT_EXTENSION: + * + * the schema has "extension" in the set of blockDefault. + */ +#define XML_SCHEMAS_BLOCK_DEFAULT_EXTENSION 1 << 6 +/** + * XML_SCHEMAS_BLOCK_DEFAULT_RESTRICTION: + * + * the schema has "restriction" in the set of blockDefault. + */ +#define XML_SCHEMAS_BLOCK_DEFAULT_RESTRICTION 1 << 7 +/** + * XML_SCHEMAS_BLOCK_DEFAULT_SUBSTITUTION: + * + * the schema has "substitution" in the set of blockDefault. + */ +#define XML_SCHEMAS_BLOCK_DEFAULT_SUBSTITUTION 1 << 8 +/** + * XML_SCHEMAS_INCLUDING_CONVERT_NS: + * + * the schema is currently including an other schema with + * no target namespace. + */ +#define XML_SCHEMAS_INCLUDING_CONVERT_NS 1 << 9 +/** + * _xmlSchema: + * + * A Schemas definition + */ +struct _xmlSchema { + const xmlChar *name; /* schema name */ + const xmlChar *targetNamespace; /* the target namespace */ + const xmlChar *version; + const xmlChar *id; /* Obsolete */ + xmlDocPtr doc; + xmlSchemaAnnotPtr annot; + int flags; + + xmlHashTablePtr typeDecl; + xmlHashTablePtr attrDecl; + xmlHashTablePtr attrgrpDecl; + xmlHashTablePtr elemDecl; + xmlHashTablePtr notaDecl; + + xmlHashTablePtr schemasImports; + + void *_private; /* unused by the library for users or bindings */ + xmlHashTablePtr groupDecl; + xmlDictPtr dict; + void *includes; /* the includes, this is opaque for now */ + int preserve; /* whether to free the document */ + int counter; /* used to give anonymous components unique names */ + xmlHashTablePtr idcDef; /* All identity-constraint defs. */ + void *volatiles; /* Obsolete */ +}; + +XMLPUBFUN void xmlSchemaFreeType (xmlSchemaTypePtr type); +XMLPUBFUN void xmlSchemaFreeWildcard(xmlSchemaWildcardPtr wildcard); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_INTERNALS_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schematron.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schematron.h new file mode 100644 index 00000000..8dd8d25c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/schematron.h @@ -0,0 +1,143 @@ +/* + * Summary: XML Schematron implementation + * Description: interface to the XML Schematron validity checking. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SCHEMATRON_H__ +#define __XML_SCHEMATRON_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMATRON_ENABLED + +#include <libxml/xmlerror.h> +#include <libxml/tree.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_SCHEMATRON_OUT_QUIET = 1 << 0, /* quiet no report */ + XML_SCHEMATRON_OUT_TEXT = 1 << 1, /* build a textual report */ + XML_SCHEMATRON_OUT_XML = 1 << 2, /* output SVRL */ + XML_SCHEMATRON_OUT_ERROR = 1 << 3, /* output via xmlStructuredErrorFunc */ + XML_SCHEMATRON_OUT_FILE = 1 << 8, /* output to a file descriptor */ + XML_SCHEMATRON_OUT_BUFFER = 1 << 9, /* output to a buffer */ + XML_SCHEMATRON_OUT_IO = 1 << 10 /* output to I/O mechanism */ +} xmlSchematronValidOptions; + +/** + * The schemas related types are kept internal + */ +typedef struct _xmlSchematron xmlSchematron; +typedef xmlSchematron *xmlSchematronPtr; + +/** + * xmlSchematronValidityErrorFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of an error callback from a Schematron validation + */ +typedef void (*xmlSchematronValidityErrorFunc) (void *ctx, const char *msg, ...); + +/** + * xmlSchematronValidityWarningFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of a warning callback from a Schematron validation + */ +typedef void (*xmlSchematronValidityWarningFunc) (void *ctx, const char *msg, ...); + +/** + * A schemas validation context + */ +typedef struct _xmlSchematronParserCtxt xmlSchematronParserCtxt; +typedef xmlSchematronParserCtxt *xmlSchematronParserCtxtPtr; + +typedef struct _xmlSchematronValidCtxt xmlSchematronValidCtxt; +typedef xmlSchematronValidCtxt *xmlSchematronValidCtxtPtr; + +/* + * Interfaces for parsing. + */ +XMLPUBFUN xmlSchematronParserCtxtPtr + xmlSchematronNewParserCtxt (const char *URL); +XMLPUBFUN xmlSchematronParserCtxtPtr + xmlSchematronNewMemParserCtxt(const char *buffer, + int size); +XMLPUBFUN xmlSchematronParserCtxtPtr + xmlSchematronNewDocParserCtxt(xmlDocPtr doc); +XMLPUBFUN void + xmlSchematronFreeParserCtxt (xmlSchematronParserCtxtPtr ctxt); +/***** +XMLPUBFUN void + xmlSchematronSetParserErrors(xmlSchematronParserCtxtPtr ctxt, + xmlSchematronValidityErrorFunc err, + xmlSchematronValidityWarningFunc warn, + void *ctx); +XMLPUBFUN int + xmlSchematronGetParserErrors(xmlSchematronParserCtxtPtr ctxt, + xmlSchematronValidityErrorFunc * err, + xmlSchematronValidityWarningFunc * warn, + void **ctx); +XMLPUBFUN int + xmlSchematronIsValid (xmlSchematronValidCtxtPtr ctxt); + *****/ +XMLPUBFUN xmlSchematronPtr + xmlSchematronParse (xmlSchematronParserCtxtPtr ctxt); +XMLPUBFUN void + xmlSchematronFree (xmlSchematronPtr schema); +/* + * Interfaces for validating + */ +XMLPUBFUN void + xmlSchematronSetValidStructuredErrors( + xmlSchematronValidCtxtPtr ctxt, + xmlStructuredErrorFunc serror, + void *ctx); +/****** +XMLPUBFUN void + xmlSchematronSetValidErrors (xmlSchematronValidCtxtPtr ctxt, + xmlSchematronValidityErrorFunc err, + xmlSchematronValidityWarningFunc warn, + void *ctx); +XMLPUBFUN int + xmlSchematronGetValidErrors (xmlSchematronValidCtxtPtr ctxt, + xmlSchematronValidityErrorFunc *err, + xmlSchematronValidityWarningFunc *warn, + void **ctx); +XMLPUBFUN int + xmlSchematronSetValidOptions(xmlSchematronValidCtxtPtr ctxt, + int options); +XMLPUBFUN int + xmlSchematronValidCtxtGetOptions(xmlSchematronValidCtxtPtr ctxt); +XMLPUBFUN int + xmlSchematronValidateOneElement (xmlSchematronValidCtxtPtr ctxt, + xmlNodePtr elem); + *******/ + +XMLPUBFUN xmlSchematronValidCtxtPtr + xmlSchematronNewValidCtxt (xmlSchematronPtr schema, + int options); +XMLPUBFUN void + xmlSchematronFreeValidCtxt (xmlSchematronValidCtxtPtr ctxt); +XMLPUBFUN int + xmlSchematronValidateDoc (xmlSchematronValidCtxtPtr ctxt, + xmlDocPtr instance); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMATRON_ENABLED */ +#endif /* __XML_SCHEMATRON_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/threads.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/threads.h new file mode 100644 index 00000000..8f4b6e17 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/threads.h @@ -0,0 +1,87 @@ +/** + * Summary: interfaces for thread handling + * Description: set of generic threading related routines + * should work with pthreads, Windows native or TLS threads + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_THREADS_H__ +#define __XML_THREADS_H__ + +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * xmlMutex are a simple mutual exception locks. + */ +typedef struct _xmlMutex xmlMutex; +typedef xmlMutex *xmlMutexPtr; + +/* + * xmlRMutex are reentrant mutual exception locks. + */ +typedef struct _xmlRMutex xmlRMutex; +typedef xmlRMutex *xmlRMutexPtr; + +XMLPUBFUN int + xmlCheckThreadLocalStorage(void); + +XMLPUBFUN xmlMutexPtr + xmlNewMutex (void); +XMLPUBFUN void + xmlMutexLock (xmlMutexPtr tok); +XMLPUBFUN void + xmlMutexUnlock (xmlMutexPtr tok); +XMLPUBFUN void + xmlFreeMutex (xmlMutexPtr tok); + +XMLPUBFUN xmlRMutexPtr + xmlNewRMutex (void); +XMLPUBFUN void + xmlRMutexLock (xmlRMutexPtr tok); +XMLPUBFUN void + xmlRMutexUnlock (xmlRMutexPtr tok); +XMLPUBFUN void + xmlFreeRMutex (xmlRMutexPtr tok); + +/* + * Library wide APIs. + */ +XML_DEPRECATED +XMLPUBFUN void + xmlInitThreads (void); +XMLPUBFUN void + xmlLockLibrary (void); +XMLPUBFUN void + xmlUnlockLibrary(void); +XML_DEPRECATED +XMLPUBFUN int + xmlGetThreadId (void); +XML_DEPRECATED +XMLPUBFUN int + xmlIsMainThread (void); +XML_DEPRECATED +XMLPUBFUN void + xmlCleanupThreads(void); + +/** DOC_DISABLE */ +#if defined(LIBXML_THREAD_ENABLED) && defined(_WIN32) && \ + defined(LIBXML_STATIC_FOR_DLL) +int +xmlDllMain(void *hinstDLL, unsigned long fdwReason, + void *lpvReserved); +#endif +/** DOC_ENABLE */ + +#ifdef __cplusplus +} +#endif + + +#endif /* __XML_THREADS_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/tree.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/tree.h new file mode 100644 index 00000000..a90a174f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/tree.h @@ -0,0 +1,1362 @@ +/* + * Summary: interfaces for tree manipulation + * Description: this module describes the structures found in an tree resulting + * from an XML or HTML parsing, as well as the API provided for + * various processing on that tree + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef XML_TREE_INTERNALS + +/* + * Emulate circular dependency for backward compatibility + */ +#include <libxml/parser.h> + +#else /* XML_TREE_INTERNALS */ + +#ifndef __XML_TREE_H__ +#define __XML_TREE_H__ + +#include <stdio.h> +#include <limits.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> +#include <libxml/xmlmemory.h> +#include <libxml/xmlregexp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Some of the basic types pointer to structures: + */ +/* xmlIO.h */ +typedef struct _xmlParserInputBuffer xmlParserInputBuffer; +typedef xmlParserInputBuffer *xmlParserInputBufferPtr; + +typedef struct _xmlOutputBuffer xmlOutputBuffer; +typedef xmlOutputBuffer *xmlOutputBufferPtr; + +/* parser.h */ +typedef struct _xmlParserInput xmlParserInput; +typedef xmlParserInput *xmlParserInputPtr; + +typedef struct _xmlParserCtxt xmlParserCtxt; +typedef xmlParserCtxt *xmlParserCtxtPtr; + +typedef struct _xmlSAXLocator xmlSAXLocator; +typedef xmlSAXLocator *xmlSAXLocatorPtr; + +typedef struct _xmlSAXHandler xmlSAXHandler; +typedef xmlSAXHandler *xmlSAXHandlerPtr; + +/* entities.h */ +typedef struct _xmlEntity xmlEntity; +typedef xmlEntity *xmlEntityPtr; + +/** + * BASE_BUFFER_SIZE: + * + * default buffer size 4000. + */ +#define BASE_BUFFER_SIZE 4096 + +/** + * LIBXML_NAMESPACE_DICT: + * + * Defines experimental behaviour: + * 1) xmlNs gets an additional field @context (a xmlDoc) + * 2) when creating a tree, xmlNs->href is stored in the dict of xmlDoc. + */ +/* #define LIBXML_NAMESPACE_DICT */ + +/** + * xmlBufferAllocationScheme: + * + * A buffer allocation scheme can be defined to either match exactly the + * need or double it's allocated size each time it is found too small. + */ + +typedef enum { + XML_BUFFER_ALLOC_DOUBLEIT, /* double each time one need to grow */ + XML_BUFFER_ALLOC_EXACT, /* grow only to the minimal size */ + XML_BUFFER_ALLOC_IMMUTABLE, /* immutable buffer, deprecated */ + XML_BUFFER_ALLOC_IO, /* special allocation scheme used for I/O */ + XML_BUFFER_ALLOC_HYBRID, /* exact up to a threshold, and doubleit thereafter */ + XML_BUFFER_ALLOC_BOUNDED /* limit the upper size of the buffer */ +} xmlBufferAllocationScheme; + +/** + * xmlBuffer: + * + * A buffer structure, this old construct is limited to 2GB and + * is being deprecated, use API with xmlBuf instead + */ +typedef struct _xmlBuffer xmlBuffer; +typedef xmlBuffer *xmlBufferPtr; +struct _xmlBuffer { + xmlChar *content; /* The buffer content UTF8 */ + unsigned int use; /* The buffer size used */ + unsigned int size; /* The buffer size */ + xmlBufferAllocationScheme alloc; /* The realloc method */ + xmlChar *contentIO; /* in IO mode we may have a different base */ +}; + +/** + * xmlBuf: + * + * A buffer structure, new one, the actual structure internals are not public + */ + +typedef struct _xmlBuf xmlBuf; + +/** + * xmlBufPtr: + * + * A pointer to a buffer structure, the actual structure internals are not + * public + */ + +typedef xmlBuf *xmlBufPtr; + +/* + * A few public routines for xmlBuf. As those are expected to be used + * mostly internally the bulk of the routines are internal in buf.h + */ +XMLPUBFUN xmlChar* xmlBufContent (const xmlBuf* buf); +XMLPUBFUN xmlChar* xmlBufEnd (xmlBufPtr buf); +XMLPUBFUN size_t xmlBufUse (const xmlBufPtr buf); +XMLPUBFUN size_t xmlBufShrink (xmlBufPtr buf, size_t len); + +/* + * LIBXML2_NEW_BUFFER: + * + * Macro used to express that the API use the new buffers for + * xmlParserInputBuffer and xmlOutputBuffer. The change was + * introduced in 2.9.0. + */ +#define LIBXML2_NEW_BUFFER + +/** + * XML_XML_NAMESPACE: + * + * This is the namespace for the special xml: prefix predefined in the + * XML Namespace specification. + */ +#define XML_XML_NAMESPACE \ + (const xmlChar *) "http://www.w3.org/XML/1998/namespace" + +/** + * XML_XML_ID: + * + * This is the name for the special xml:id attribute + */ +#define XML_XML_ID (const xmlChar *) "xml:id" + +/* + * The different element types carried by an XML tree. + * + * NOTE: This is synchronized with DOM Level1 values + * See http://www.w3.org/TR/REC-DOM-Level-1/ + * + * Actually this had diverged a bit, and now XML_DOCUMENT_TYPE_NODE should + * be deprecated to use an XML_DTD_NODE. + */ +typedef enum { + XML_ELEMENT_NODE= 1, + XML_ATTRIBUTE_NODE= 2, + XML_TEXT_NODE= 3, + XML_CDATA_SECTION_NODE= 4, + XML_ENTITY_REF_NODE= 5, + XML_ENTITY_NODE= 6, + XML_PI_NODE= 7, + XML_COMMENT_NODE= 8, + XML_DOCUMENT_NODE= 9, + XML_DOCUMENT_TYPE_NODE= 10, + XML_DOCUMENT_FRAG_NODE= 11, + XML_NOTATION_NODE= 12, + XML_HTML_DOCUMENT_NODE= 13, + XML_DTD_NODE= 14, + XML_ELEMENT_DECL= 15, + XML_ATTRIBUTE_DECL= 16, + XML_ENTITY_DECL= 17, + XML_NAMESPACE_DECL= 18, + XML_XINCLUDE_START= 19, + XML_XINCLUDE_END= 20 + /* XML_DOCB_DOCUMENT_NODE= 21 */ /* removed */ +} xmlElementType; + +/** DOC_DISABLE */ +/* For backward compatibility */ +#define XML_DOCB_DOCUMENT_NODE 21 +/** DOC_ENABLE */ + +/** + * xmlNotation: + * + * A DTD Notation definition. + */ + +typedef struct _xmlNotation xmlNotation; +typedef xmlNotation *xmlNotationPtr; +struct _xmlNotation { + const xmlChar *name; /* Notation name */ + const xmlChar *PublicID; /* Public identifier, if any */ + const xmlChar *SystemID; /* System identifier, if any */ +}; + +/** + * xmlAttributeType: + * + * A DTD Attribute type definition. + */ + +typedef enum { + XML_ATTRIBUTE_CDATA = 1, + XML_ATTRIBUTE_ID, + XML_ATTRIBUTE_IDREF , + XML_ATTRIBUTE_IDREFS, + XML_ATTRIBUTE_ENTITY, + XML_ATTRIBUTE_ENTITIES, + XML_ATTRIBUTE_NMTOKEN, + XML_ATTRIBUTE_NMTOKENS, + XML_ATTRIBUTE_ENUMERATION, + XML_ATTRIBUTE_NOTATION +} xmlAttributeType; + +/** + * xmlAttributeDefault: + * + * A DTD Attribute default definition. + */ + +typedef enum { + XML_ATTRIBUTE_NONE = 1, + XML_ATTRIBUTE_REQUIRED, + XML_ATTRIBUTE_IMPLIED, + XML_ATTRIBUTE_FIXED +} xmlAttributeDefault; + +/** + * xmlEnumeration: + * + * List structure used when there is an enumeration in DTDs. + */ + +typedef struct _xmlEnumeration xmlEnumeration; +typedef xmlEnumeration *xmlEnumerationPtr; +struct _xmlEnumeration { + struct _xmlEnumeration *next; /* next one */ + const xmlChar *name; /* Enumeration name */ +}; + +/** + * xmlAttribute: + * + * An Attribute declaration in a DTD. + */ + +typedef struct _xmlAttribute xmlAttribute; +typedef xmlAttribute *xmlAttributePtr; +struct _xmlAttribute { + void *_private; /* application data */ + xmlElementType type; /* XML_ATTRIBUTE_DECL, must be second ! */ + const xmlChar *name; /* Attribute name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + struct _xmlAttribute *nexth; /* next in hash table */ + xmlAttributeType atype; /* The attribute type */ + xmlAttributeDefault def; /* the default */ + const xmlChar *defaultValue; /* or the default value */ + xmlEnumerationPtr tree; /* or the enumeration tree if any */ + const xmlChar *prefix; /* the namespace prefix if any */ + const xmlChar *elem; /* Element holding the attribute */ +}; + +/** + * xmlElementContentType: + * + * Possible definitions of element content types. + */ +typedef enum { + XML_ELEMENT_CONTENT_PCDATA = 1, + XML_ELEMENT_CONTENT_ELEMENT, + XML_ELEMENT_CONTENT_SEQ, + XML_ELEMENT_CONTENT_OR +} xmlElementContentType; + +/** + * xmlElementContentOccur: + * + * Possible definitions of element content occurrences. + */ +typedef enum { + XML_ELEMENT_CONTENT_ONCE = 1, + XML_ELEMENT_CONTENT_OPT, + XML_ELEMENT_CONTENT_MULT, + XML_ELEMENT_CONTENT_PLUS +} xmlElementContentOccur; + +/** + * xmlElementContent: + * + * An XML Element content as stored after parsing an element definition + * in a DTD. + */ + +typedef struct _xmlElementContent xmlElementContent; +typedef xmlElementContent *xmlElementContentPtr; +struct _xmlElementContent { + xmlElementContentType type; /* PCDATA, ELEMENT, SEQ or OR */ + xmlElementContentOccur ocur; /* ONCE, OPT, MULT or PLUS */ + const xmlChar *name; /* Element name */ + struct _xmlElementContent *c1; /* first child */ + struct _xmlElementContent *c2; /* second child */ + struct _xmlElementContent *parent; /* parent */ + const xmlChar *prefix; /* Namespace prefix */ +}; + +/** + * xmlElementTypeVal: + * + * The different possibilities for an element content type. + */ + +typedef enum { + XML_ELEMENT_TYPE_UNDEFINED = 0, + XML_ELEMENT_TYPE_EMPTY = 1, + XML_ELEMENT_TYPE_ANY, + XML_ELEMENT_TYPE_MIXED, + XML_ELEMENT_TYPE_ELEMENT +} xmlElementTypeVal; + +/** + * xmlElement: + * + * An XML Element declaration from a DTD. + */ + +typedef struct _xmlElement xmlElement; +typedef xmlElement *xmlElementPtr; +struct _xmlElement { + void *_private; /* application data */ + xmlElementType type; /* XML_ELEMENT_DECL, must be second ! */ + const xmlChar *name; /* Element name */ + struct _xmlNode *children; /* NULL */ + struct _xmlNode *last; /* NULL */ + struct _xmlDtd *parent; /* -> DTD */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + xmlElementTypeVal etype; /* The type */ + xmlElementContentPtr content; /* the allowed element content */ + xmlAttributePtr attributes; /* List of the declared attributes */ + const xmlChar *prefix; /* the namespace prefix if any */ +#ifdef LIBXML_REGEXP_ENABLED + xmlRegexpPtr contModel; /* the validating regexp */ +#else + void *contModel; +#endif +}; + + +/** + * XML_LOCAL_NAMESPACE: + * + * A namespace declaration node. + */ +#define XML_LOCAL_NAMESPACE XML_NAMESPACE_DECL +typedef xmlElementType xmlNsType; + +/** + * xmlNs: + * + * An XML namespace. + * Note that prefix == NULL is valid, it defines the default namespace + * within the subtree (until overridden). + * + * xmlNsType is unified with xmlElementType. + */ + +typedef struct _xmlNs xmlNs; +typedef xmlNs *xmlNsPtr; +struct _xmlNs { + struct _xmlNs *next; /* next Ns link for this node */ + xmlNsType type; /* global or local */ + const xmlChar *href; /* URL for the namespace */ + const xmlChar *prefix; /* prefix for the namespace */ + void *_private; /* application data */ + struct _xmlDoc *context; /* normally an xmlDoc */ +}; + +/** + * xmlDtd: + * + * An XML DTD, as defined by <!DOCTYPE ... There is actually one for + * the internal subset and for the external subset. + */ +typedef struct _xmlDtd xmlDtd; +typedef xmlDtd *xmlDtdPtr; +struct _xmlDtd { + void *_private; /* application data */ + xmlElementType type; /* XML_DTD_NODE, must be second ! */ + const xmlChar *name; /* Name of the DTD */ + struct _xmlNode *children; /* the value of the property link */ + struct _xmlNode *last; /* last child link */ + struct _xmlDoc *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + void *notations; /* Hash table for notations if any */ + void *elements; /* Hash table for elements if any */ + void *attributes; /* Hash table for attributes if any */ + void *entities; /* Hash table for entities if any */ + const xmlChar *ExternalID; /* External identifier for PUBLIC DTD */ + const xmlChar *SystemID; /* URI for a SYSTEM or PUBLIC DTD */ + void *pentities; /* Hash table for param entities if any */ +}; + +/** + * xmlAttr: + * + * An attribute on an XML node. + */ +typedef struct _xmlAttr xmlAttr; +typedef xmlAttr *xmlAttrPtr; +struct _xmlAttr { + void *_private; /* application data */ + xmlElementType type; /* XML_ATTRIBUTE_NODE, must be second ! */ + const xmlChar *name; /* the name of the property */ + struct _xmlNode *children; /* the value of the property */ + struct _xmlNode *last; /* NULL */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlAttr *next; /* next sibling link */ + struct _xmlAttr *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlAttributeType atype; /* the attribute type if validating */ + void *psvi; /* for type/PSVI information */ +}; + +/** + * xmlID: + * + * An XML ID instance. + */ + +typedef struct _xmlID xmlID; +typedef xmlID *xmlIDPtr; +struct _xmlID { + struct _xmlID *next; /* next ID */ + const xmlChar *value; /* The ID name */ + xmlAttrPtr attr; /* The attribute holding it */ + const xmlChar *name; /* The attribute if attr is not available */ + int lineno; /* The line number if attr is not available */ + struct _xmlDoc *doc; /* The document holding the ID */ +}; + +/** + * xmlRef: + * + * An XML IDREF instance. + */ + +typedef struct _xmlRef xmlRef; +typedef xmlRef *xmlRefPtr; +struct _xmlRef { + struct _xmlRef *next; /* next Ref */ + const xmlChar *value; /* The Ref name */ + xmlAttrPtr attr; /* The attribute holding it */ + const xmlChar *name; /* The attribute if attr is not available */ + int lineno; /* The line number if attr is not available */ +}; + +/** + * xmlNode: + * + * A node in an XML tree. + */ +typedef struct _xmlNode xmlNode; +typedef xmlNode *xmlNodePtr; +struct _xmlNode { + void *_private; /* application data */ + xmlElementType type; /* type number, must be second ! */ + const xmlChar *name; /* the name of the node, or the entity */ + struct _xmlNode *children; /* parent->childs link */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* the containing document */ + + /* End of common part */ + xmlNs *ns; /* pointer to the associated namespace */ + xmlChar *content; /* the content */ + struct _xmlAttr *properties;/* properties list */ + xmlNs *nsDef; /* namespace definitions on this node */ + void *psvi; /* for type/PSVI information */ + unsigned short line; /* line number */ + unsigned short extra; /* extra data for XPath/XSLT */ +}; + +/** + * XML_GET_CONTENT: + * + * Macro to extract the content pointer of a node. + */ +#define XML_GET_CONTENT(n) \ + ((n)->type == XML_ELEMENT_NODE ? NULL : (n)->content) + +/** + * XML_GET_LINE: + * + * Macro to extract the line number of an element node. + */ +#define XML_GET_LINE(n) \ + (xmlGetLineNo(n)) + +/** + * xmlDocProperty + * + * Set of properties of the document as found by the parser + * Some of them are linked to similarly named xmlParserOption + */ +typedef enum { + XML_DOC_WELLFORMED = 1<<0, /* document is XML well formed */ + XML_DOC_NSVALID = 1<<1, /* document is Namespace valid */ + XML_DOC_OLD10 = 1<<2, /* parsed with old XML-1.0 parser */ + XML_DOC_DTDVALID = 1<<3, /* DTD validation was successful */ + XML_DOC_XINCLUDE = 1<<4, /* XInclude substitution was done */ + XML_DOC_USERBUILT = 1<<5, /* Document was built using the API + and not by parsing an instance */ + XML_DOC_INTERNAL = 1<<6, /* built for internal processing */ + XML_DOC_HTML = 1<<7 /* parsed or built HTML document */ +} xmlDocProperties; + +/** + * xmlDoc: + * + * An XML document. + */ +typedef struct _xmlDoc xmlDoc; +typedef xmlDoc *xmlDocPtr; +struct _xmlDoc { + void *_private; /* application data */ + xmlElementType type; /* XML_DOCUMENT_NODE, must be second ! */ + char *name; /* name/filename/URI of the document */ + struct _xmlNode *children; /* the document tree */ + struct _xmlNode *last; /* last child link */ + struct _xmlNode *parent; /* child->parent link */ + struct _xmlNode *next; /* next sibling link */ + struct _xmlNode *prev; /* previous sibling link */ + struct _xmlDoc *doc; /* autoreference to itself */ + + /* End of common part */ + int compression;/* level of zlib compression */ + int standalone; /* standalone document (no external refs) + 1 if standalone="yes" + 0 if standalone="no" + -1 if there is no XML declaration + -2 if there is an XML declaration, but no + standalone attribute was specified */ + struct _xmlDtd *intSubset; /* the document internal subset */ + struct _xmlDtd *extSubset; /* the document external subset */ + struct _xmlNs *oldNs; /* Global namespace, the old way */ + const xmlChar *version; /* the XML version string */ + const xmlChar *encoding; /* actual encoding, if any */ + void *ids; /* Hash table for ID attributes if any */ + void *refs; /* Hash table for IDREFs attributes if any */ + const xmlChar *URL; /* The URI for that document */ + int charset; /* unused */ + struct _xmlDict *dict; /* dict used to allocate names or NULL */ + void *psvi; /* for type/PSVI information */ + int parseFlags; /* set of xmlParserOption used to parse the + document */ + int properties; /* set of xmlDocProperties for this document + set at the end of parsing */ +}; + + +typedef struct _xmlDOMWrapCtxt xmlDOMWrapCtxt; +typedef xmlDOMWrapCtxt *xmlDOMWrapCtxtPtr; + +/** + * xmlDOMWrapAcquireNsFunction: + * @ctxt: a DOM wrapper context + * @node: the context node (element or attribute) + * @nsName: the requested namespace name + * @nsPrefix: the requested namespace prefix + * + * A function called to acquire namespaces (xmlNs) from the wrapper. + * + * Returns an xmlNsPtr or NULL in case of an error. + */ +typedef xmlNsPtr (*xmlDOMWrapAcquireNsFunction) (xmlDOMWrapCtxtPtr ctxt, + xmlNodePtr node, + const xmlChar *nsName, + const xmlChar *nsPrefix); + +/** + * xmlDOMWrapCtxt: + * + * Context for DOM wrapper-operations. + */ +struct _xmlDOMWrapCtxt { + void * _private; + /* + * The type of this context, just in case we need specialized + * contexts in the future. + */ + int type; + /* + * Internal namespace map used for various operations. + */ + void * namespaceMap; + /* + * Use this one to acquire an xmlNsPtr intended for node->ns. + * (Note that this is not intended for elem->nsDef). + */ + xmlDOMWrapAcquireNsFunction getNsForNodeFunc; +}; + +/** + * xmlRegisterNodeFunc: + * @node: the current node + * + * Signature for the registration callback of a created node + */ +typedef void (*xmlRegisterNodeFunc) (xmlNodePtr node); + +/** + * xmlDeregisterNodeFunc: + * @node: the current node + * + * Signature for the deregistration callback of a discarded node + */ +typedef void (*xmlDeregisterNodeFunc) (xmlNodePtr node); + +/** + * xmlChildrenNode: + * + * Macro for compatibility naming layer with libxml1. Maps + * to "children." + */ +#ifndef xmlChildrenNode +#define xmlChildrenNode children +#endif + +/** + * xmlRootNode: + * + * Macro for compatibility naming layer with libxml1. Maps + * to "children". + */ +#ifndef xmlRootNode +#define xmlRootNode children +#endif + +/* + * Variables. + */ + +/** DOC_DISABLE */ +#define XML_GLOBALS_TREE \ + XML_OP(xmlBufferAllocScheme, xmlBufferAllocationScheme, XML_DEPRECATED) \ + XML_OP(xmlDefaultBufferSize, int, XML_DEPRECATED) \ + XML_OP(xmlRegisterNodeDefaultValue, xmlRegisterNodeFunc, XML_DEPRECATED) \ + XML_OP(xmlDeregisterNodeDefaultValue, xmlDeregisterNodeFunc, \ + XML_DEPRECATED) + +#define XML_OP XML_DECLARE_GLOBAL +XML_GLOBALS_TREE +#undef XML_OP + +#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define xmlBufferAllocScheme XML_GLOBAL_MACRO(xmlBufferAllocScheme) + #define xmlDefaultBufferSize XML_GLOBAL_MACRO(xmlDefaultBufferSize) + #define xmlRegisterNodeDefaultValue \ + XML_GLOBAL_MACRO(xmlRegisterNodeDefaultValue) + #define xmlDeregisterNodeDefaultValue \ + XML_GLOBAL_MACRO(xmlDeregisterNodeDefaultValue) +#endif +/** DOC_ENABLE */ + +/* + * Some helper functions + */ +XMLPUBFUN int + xmlValidateNCName (const xmlChar *value, + int space); + +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN int + xmlValidateQName (const xmlChar *value, + int space); +XMLPUBFUN int + xmlValidateName (const xmlChar *value, + int space); +XMLPUBFUN int + xmlValidateNMToken (const xmlChar *value, + int space); +#endif + +XMLPUBFUN xmlChar * + xmlBuildQName (const xmlChar *ncname, + const xmlChar *prefix, + xmlChar *memory, + int len); +XMLPUBFUN xmlChar * + xmlSplitQName2 (const xmlChar *name, + xmlChar **prefix); +XMLPUBFUN const xmlChar * + xmlSplitQName3 (const xmlChar *name, + int *len); + +/* + * Handling Buffers, the old ones see @xmlBuf for the new ones. + */ + +XMLPUBFUN void + xmlSetBufferAllocationScheme(xmlBufferAllocationScheme scheme); +XMLPUBFUN xmlBufferAllocationScheme + xmlGetBufferAllocationScheme(void); + +XMLPUBFUN xmlBufferPtr + xmlBufferCreate (void); +XMLPUBFUN xmlBufferPtr + xmlBufferCreateSize (size_t size); +XMLPUBFUN xmlBufferPtr + xmlBufferCreateStatic (void *mem, + size_t size); +XMLPUBFUN int + xmlBufferResize (xmlBufferPtr buf, + unsigned int size); +XMLPUBFUN void + xmlBufferFree (xmlBufferPtr buf); +XMLPUBFUN int + xmlBufferDump (FILE *file, + xmlBufferPtr buf); +XMLPUBFUN int + xmlBufferAdd (xmlBufferPtr buf, + const xmlChar *str, + int len); +XMLPUBFUN int + xmlBufferAddHead (xmlBufferPtr buf, + const xmlChar *str, + int len); +XMLPUBFUN int + xmlBufferCat (xmlBufferPtr buf, + const xmlChar *str); +XMLPUBFUN int + xmlBufferCCat (xmlBufferPtr buf, + const char *str); +XMLPUBFUN int + xmlBufferShrink (xmlBufferPtr buf, + unsigned int len); +XMLPUBFUN int + xmlBufferGrow (xmlBufferPtr buf, + unsigned int len); +XMLPUBFUN void + xmlBufferEmpty (xmlBufferPtr buf); +XMLPUBFUN const xmlChar* + xmlBufferContent (const xmlBuffer *buf); +XMLPUBFUN xmlChar* + xmlBufferDetach (xmlBufferPtr buf); +XMLPUBFUN void + xmlBufferSetAllocationScheme(xmlBufferPtr buf, + xmlBufferAllocationScheme scheme); +XMLPUBFUN int + xmlBufferLength (const xmlBuffer *buf); + +/* + * Creating/freeing new structures. + */ +XMLPUBFUN xmlDtdPtr + xmlCreateIntSubset (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN xmlDtdPtr + xmlNewDtd (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *ExternalID, + const xmlChar *SystemID); +XMLPUBFUN xmlDtdPtr + xmlGetIntSubset (const xmlDoc *doc); +XMLPUBFUN void + xmlFreeDtd (xmlDtdPtr cur); +#ifdef LIBXML_LEGACY_ENABLED +XML_DEPRECATED +XMLPUBFUN xmlNsPtr + xmlNewGlobalNs (xmlDocPtr doc, + const xmlChar *href, + const xmlChar *prefix); +#endif /* LIBXML_LEGACY_ENABLED */ +XMLPUBFUN xmlNsPtr + xmlNewNs (xmlNodePtr node, + const xmlChar *href, + const xmlChar *prefix); +XMLPUBFUN void + xmlFreeNs (xmlNsPtr cur); +XMLPUBFUN void + xmlFreeNsList (xmlNsPtr cur); +XMLPUBFUN xmlDocPtr + xmlNewDoc (const xmlChar *version); +XMLPUBFUN void + xmlFreeDoc (xmlDocPtr cur); +XMLPUBFUN xmlAttrPtr + xmlNewDocProp (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *value); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_HTML_ENABLED) || \ + defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN xmlAttrPtr + xmlNewProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +#endif +XMLPUBFUN xmlAttrPtr + xmlNewNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +XMLPUBFUN xmlAttrPtr + xmlNewNsPropEatName (xmlNodePtr node, + xmlNsPtr ns, + xmlChar *name, + const xmlChar *value); +XMLPUBFUN void + xmlFreePropList (xmlAttrPtr cur); +XMLPUBFUN void + xmlFreeProp (xmlAttrPtr cur); +XMLPUBFUN xmlAttrPtr + xmlCopyProp (xmlNodePtr target, + xmlAttrPtr cur); +XMLPUBFUN xmlAttrPtr + xmlCopyPropList (xmlNodePtr target, + xmlAttrPtr cur); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlDtdPtr + xmlCopyDtd (xmlDtdPtr dtd); +#endif /* LIBXML_TREE_ENABLED */ +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN xmlDocPtr + xmlCopyDoc (xmlDocPtr doc, + int recursive); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) */ +/* + * Creating new nodes. + */ +XMLPUBFUN xmlNodePtr + xmlNewDocNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewDocNodeEatName (xmlDocPtr doc, + xmlNsPtr ns, + xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewNode (xmlNsPtr ns, + const xmlChar *name); +XMLPUBFUN xmlNodePtr + xmlNewNodeEatName (xmlNsPtr ns, + xmlChar *name); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN xmlNodePtr + xmlNewChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +#endif +XMLPUBFUN xmlNodePtr + xmlNewDocText (const xmlDoc *doc, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewText (const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewDocPI (xmlDocPtr doc, + const xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewPI (const xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewDocTextLen (xmlDocPtr doc, + const xmlChar *content, + int len); +XMLPUBFUN xmlNodePtr + xmlNewTextLen (const xmlChar *content, + int len); +XMLPUBFUN xmlNodePtr + xmlNewDocComment (xmlDocPtr doc, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewComment (const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewCDataBlock (xmlDocPtr doc, + const xmlChar *content, + int len); +XMLPUBFUN xmlNodePtr + xmlNewCharRef (xmlDocPtr doc, + const xmlChar *name); +XMLPUBFUN xmlNodePtr + xmlNewReference (const xmlDoc *doc, + const xmlChar *name); +XMLPUBFUN xmlNodePtr + xmlCopyNode (xmlNodePtr node, + int recursive); +XMLPUBFUN xmlNodePtr + xmlDocCopyNode (xmlNodePtr node, + xmlDocPtr doc, + int recursive); +XMLPUBFUN xmlNodePtr + xmlDocCopyNodeList (xmlDocPtr doc, + xmlNodePtr node); +XMLPUBFUN xmlNodePtr + xmlCopyNodeList (xmlNodePtr node); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlNodePtr + xmlNewTextChild (xmlNodePtr parent, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewDocRawNode (xmlDocPtr doc, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *content); +XMLPUBFUN xmlNodePtr + xmlNewDocFragment (xmlDocPtr doc); +#endif /* LIBXML_TREE_ENABLED */ + +/* + * Navigating. + */ +XMLPUBFUN long + xmlGetLineNo (const xmlNode *node); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED) +XMLPUBFUN xmlChar * + xmlGetNodePath (const xmlNode *node); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_DEBUG_ENABLED) */ +XMLPUBFUN xmlNodePtr + xmlDocGetRootElement (const xmlDoc *doc); +XMLPUBFUN xmlNodePtr + xmlGetLastChild (const xmlNode *parent); +XMLPUBFUN int + xmlNodeIsText (const xmlNode *node); +XMLPUBFUN int + xmlIsBlankNode (const xmlNode *node); + +/* + * Changing the structure. + */ +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_WRITER_ENABLED) +XMLPUBFUN xmlNodePtr + xmlDocSetRootElement (xmlDocPtr doc, + xmlNodePtr root); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_WRITER_ENABLED) */ +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN void + xmlNodeSetName (xmlNodePtr cur, + const xmlChar *name); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN xmlNodePtr + xmlAddChild (xmlNodePtr parent, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr + xmlAddChildList (xmlNodePtr parent, + xmlNodePtr cur); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_WRITER_ENABLED) +XMLPUBFUN xmlNodePtr + xmlReplaceNode (xmlNodePtr old, + xmlNodePtr cur); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_WRITER_ENABLED) */ +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_HTML_ENABLED) || \ + defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_XINCLUDE_ENABLED) +XMLPUBFUN xmlNodePtr + xmlAddPrevSibling (xmlNodePtr cur, + xmlNodePtr elem); +#endif /* LIBXML_TREE_ENABLED || LIBXML_HTML_ENABLED || LIBXML_SCHEMAS_ENABLED */ +XMLPUBFUN xmlNodePtr + xmlAddSibling (xmlNodePtr cur, + xmlNodePtr elem); +XMLPUBFUN xmlNodePtr + xmlAddNextSibling (xmlNodePtr cur, + xmlNodePtr elem); +XMLPUBFUN void + xmlUnlinkNode (xmlNodePtr cur); +XMLPUBFUN xmlNodePtr + xmlTextMerge (xmlNodePtr first, + xmlNodePtr second); +XMLPUBFUN int + xmlTextConcat (xmlNodePtr node, + const xmlChar *content, + int len); +XMLPUBFUN void + xmlFreeNodeList (xmlNodePtr cur); +XMLPUBFUN void + xmlFreeNode (xmlNodePtr cur); +XMLPUBFUN void + xmlSetTreeDoc (xmlNodePtr tree, + xmlDocPtr doc); +XMLPUBFUN void + xmlSetListDoc (xmlNodePtr list, + xmlDocPtr doc); +/* + * Namespaces. + */ +XMLPUBFUN xmlNsPtr + xmlSearchNs (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *nameSpace); +XMLPUBFUN xmlNsPtr + xmlSearchNsByHref (xmlDocPtr doc, + xmlNodePtr node, + const xmlChar *href); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_XPATH_ENABLED) || \ + defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN xmlNsPtr * + xmlGetNsList (const xmlDoc *doc, + const xmlNode *node); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_XPATH_ENABLED) */ + +XMLPUBFUN void + xmlSetNs (xmlNodePtr node, + xmlNsPtr ns); +XMLPUBFUN xmlNsPtr + xmlCopyNamespace (xmlNsPtr cur); +XMLPUBFUN xmlNsPtr + xmlCopyNamespaceList (xmlNsPtr cur); + +/* + * Changing the content. + */ +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_XINCLUDE_ENABLED) || \ + defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_HTML_ENABLED) +XMLPUBFUN xmlAttrPtr + xmlSetProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *value); +XMLPUBFUN xmlAttrPtr + xmlSetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name, + const xmlChar *value); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_XINCLUDE_ENABLED) || \ + defined(LIBXML_SCHEMAS_ENABLED) || defined(LIBXML_HTML_ENABLED) */ +XMLPUBFUN xmlChar * + xmlGetNoNsProp (const xmlNode *node, + const xmlChar *name); +XMLPUBFUN xmlChar * + xmlGetProp (const xmlNode *node, + const xmlChar *name); +XMLPUBFUN xmlAttrPtr + xmlHasProp (const xmlNode *node, + const xmlChar *name); +XMLPUBFUN xmlAttrPtr + xmlHasNsProp (const xmlNode *node, + const xmlChar *name, + const xmlChar *nameSpace); +XMLPUBFUN xmlChar * + xmlGetNsProp (const xmlNode *node, + const xmlChar *name, + const xmlChar *nameSpace); +XMLPUBFUN xmlNodePtr + xmlStringGetNodeList (const xmlDoc *doc, + const xmlChar *value); +XMLPUBFUN xmlNodePtr + xmlStringLenGetNodeList (const xmlDoc *doc, + const xmlChar *value, + int len); +XMLPUBFUN xmlChar * + xmlNodeListGetString (xmlDocPtr doc, + const xmlNode *list, + int inLine); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlChar * + xmlNodeListGetRawString (const xmlDoc *doc, + const xmlNode *list, + int inLine); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlNodeSetContent (xmlNodePtr cur, + const xmlChar *content); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN void + xmlNodeSetContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlNodeAddContent (xmlNodePtr cur, + const xmlChar *content); +XMLPUBFUN void + xmlNodeAddContentLen (xmlNodePtr cur, + const xmlChar *content, + int len); +XMLPUBFUN xmlChar * + xmlNodeGetContent (const xmlNode *cur); + +XMLPUBFUN int + xmlNodeBufGetContent (xmlBufferPtr buffer, + const xmlNode *cur); +XMLPUBFUN int + xmlBufGetNodeContent (xmlBufPtr buf, + const xmlNode *cur); + +XMLPUBFUN xmlChar * + xmlNodeGetLang (const xmlNode *cur); +XMLPUBFUN int + xmlNodeGetSpacePreserve (const xmlNode *cur); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN void + xmlNodeSetLang (xmlNodePtr cur, + const xmlChar *lang); +XMLPUBFUN void + xmlNodeSetSpacePreserve (xmlNodePtr cur, + int val); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN xmlChar * + xmlNodeGetBase (const xmlDoc *doc, + const xmlNode *cur); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_XINCLUDE_ENABLED) +XMLPUBFUN void + xmlNodeSetBase (xmlNodePtr cur, + const xmlChar *uri); +#endif + +/* + * Removing content. + */ +XMLPUBFUN int + xmlRemoveProp (xmlAttrPtr cur); +#if defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN int + xmlUnsetNsProp (xmlNodePtr node, + xmlNsPtr ns, + const xmlChar *name); +XMLPUBFUN int + xmlUnsetProp (xmlNodePtr node, + const xmlChar *name); +#endif /* defined(LIBXML_TREE_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) */ + +/* + * Internal, don't use. + */ +XMLPUBFUN void + xmlBufferWriteCHAR (xmlBufferPtr buf, + const xmlChar *string); +XMLPUBFUN void + xmlBufferWriteChar (xmlBufferPtr buf, + const char *string); +XMLPUBFUN void + xmlBufferWriteQuotedString(xmlBufferPtr buf, + const xmlChar *string); + +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void xmlAttrSerializeTxtContent(xmlBufferPtr buf, + xmlDocPtr doc, + xmlAttrPtr attr, + const xmlChar *string); +#endif /* LIBXML_OUTPUT_ENABLED */ + +#ifdef LIBXML_TREE_ENABLED +/* + * Namespace handling. + */ +XMLPUBFUN int + xmlReconciliateNs (xmlDocPtr doc, + xmlNodePtr tree); +#endif + +#ifdef LIBXML_OUTPUT_ENABLED +/* + * Saving. + */ +XMLPUBFUN void + xmlDocDumpFormatMemory (xmlDocPtr cur, + xmlChar **mem, + int *size, + int format); +XMLPUBFUN void + xmlDocDumpMemory (xmlDocPtr cur, + xmlChar **mem, + int *size); +XMLPUBFUN void + xmlDocDumpMemoryEnc (xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding); +XMLPUBFUN void + xmlDocDumpFormatMemoryEnc(xmlDocPtr out_doc, + xmlChar **doc_txt_ptr, + int * doc_txt_len, + const char *txt_encoding, + int format); +XMLPUBFUN int + xmlDocFormatDump (FILE *f, + xmlDocPtr cur, + int format); +XMLPUBFUN int + xmlDocDump (FILE *f, + xmlDocPtr cur); +XMLPUBFUN void + xmlElemDump (FILE *f, + xmlDocPtr doc, + xmlNodePtr cur); +XMLPUBFUN int + xmlSaveFile (const char *filename, + xmlDocPtr cur); +XMLPUBFUN int + xmlSaveFormatFile (const char *filename, + xmlDocPtr cur, + int format); +XMLPUBFUN size_t + xmlBufNodeDump (xmlBufPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format); +XMLPUBFUN int + xmlNodeDump (xmlBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format); + +XMLPUBFUN int + xmlSaveFileTo (xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding); +XMLPUBFUN int + xmlSaveFormatFileTo (xmlOutputBufferPtr buf, + xmlDocPtr cur, + const char *encoding, + int format); +XMLPUBFUN void + xmlNodeDumpOutput (xmlOutputBufferPtr buf, + xmlDocPtr doc, + xmlNodePtr cur, + int level, + int format, + const char *encoding); + +XMLPUBFUN int + xmlSaveFormatFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding, + int format); + +XMLPUBFUN int + xmlSaveFileEnc (const char *filename, + xmlDocPtr cur, + const char *encoding); + +#endif /* LIBXML_OUTPUT_ENABLED */ +/* + * XHTML + */ +XMLPUBFUN int + xmlIsXHTML (const xmlChar *systemID, + const xmlChar *publicID); + +/* + * Compression. + */ +XMLPUBFUN int + xmlGetDocCompressMode (const xmlDoc *doc); +XMLPUBFUN void + xmlSetDocCompressMode (xmlDocPtr doc, + int mode); +XMLPUBFUN int + xmlGetCompressMode (void); +XMLPUBFUN void + xmlSetCompressMode (int mode); + +/* +* DOM-wrapper helper functions. +*/ +XMLPUBFUN xmlDOMWrapCtxtPtr + xmlDOMWrapNewCtxt (void); +XMLPUBFUN void + xmlDOMWrapFreeCtxt (xmlDOMWrapCtxtPtr ctxt); +XMLPUBFUN int + xmlDOMWrapReconcileNamespaces(xmlDOMWrapCtxtPtr ctxt, + xmlNodePtr elem, + int options); +XMLPUBFUN int + xmlDOMWrapAdoptNode (xmlDOMWrapCtxtPtr ctxt, + xmlDocPtr sourceDoc, + xmlNodePtr node, + xmlDocPtr destDoc, + xmlNodePtr destParent, + int options); +XMLPUBFUN int + xmlDOMWrapRemoveNode (xmlDOMWrapCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr node, + int options); +XMLPUBFUN int + xmlDOMWrapCloneNode (xmlDOMWrapCtxtPtr ctxt, + xmlDocPtr sourceDoc, + xmlNodePtr node, + xmlNodePtr *clonedNode, + xmlDocPtr destDoc, + xmlNodePtr destParent, + int deep, + int options); + +#ifdef LIBXML_TREE_ENABLED +/* + * 5 interfaces from DOM ElementTraversal, but different in entities + * traversal. + */ +XMLPUBFUN unsigned long + xmlChildElementCount (xmlNodePtr parent); +XMLPUBFUN xmlNodePtr + xmlNextElementSibling (xmlNodePtr node); +XMLPUBFUN xmlNodePtr + xmlFirstElementChild (xmlNodePtr parent); +XMLPUBFUN xmlNodePtr + xmlLastElementChild (xmlNodePtr parent); +XMLPUBFUN xmlNodePtr + xmlPreviousElementSibling (xmlNodePtr node); +#endif + +XMLPUBFUN xmlRegisterNodeFunc + xmlRegisterNodeDefault (xmlRegisterNodeFunc func); +XMLPUBFUN xmlDeregisterNodeFunc + xmlDeregisterNodeDefault (xmlDeregisterNodeFunc func); +XMLPUBFUN xmlRegisterNodeFunc + xmlThrDefRegisterNodeDefault(xmlRegisterNodeFunc func); +XMLPUBFUN xmlDeregisterNodeFunc + xmlThrDefDeregisterNodeDefault(xmlDeregisterNodeFunc func); + +XML_DEPRECATED XMLPUBFUN xmlBufferAllocationScheme + xmlThrDefBufferAllocScheme (xmlBufferAllocationScheme v); +XML_DEPRECATED XMLPUBFUN int + xmlThrDefDefaultBufferSize (int v); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_TREE_H__ */ + +#endif /* XML_TREE_INTERNALS */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/uri.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/uri.h new file mode 100644 index 00000000..eb8631cf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/uri.h @@ -0,0 +1,95 @@ +/** + * Summary: library of generic URI related routines + * Description: library of generic URI related routines + * Implements RFC 2396 + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_URI_H__ +#define __XML_URI_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlURI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + * + * Note: query is a deprecated field which is incorrectly unescaped. + * query_raw takes precedence over query if the former is set. + * See: http://mail.gnome.org/archives/xml/2007-April/thread.html#00127 + */ +typedef struct _xmlURI xmlURI; +typedef xmlURI *xmlURIPtr; +struct _xmlURI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *query; /* the query string (deprecated - use with caution) */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ + char *query_raw; /* the query string (as it appears in the URI) */ +}; + +/* + * This function is in tree.h: + * xmlChar * xmlNodeGetBase (xmlDocPtr doc, + * xmlNodePtr cur); + */ +XMLPUBFUN xmlURIPtr + xmlCreateURI (void); +XMLPUBFUN xmlChar * + xmlBuildURI (const xmlChar *URI, + const xmlChar *base); +XMLPUBFUN xmlChar * + xmlBuildRelativeURI (const xmlChar *URI, + const xmlChar *base); +XMLPUBFUN xmlURIPtr + xmlParseURI (const char *str); +XMLPUBFUN xmlURIPtr + xmlParseURIRaw (const char *str, + int raw); +XMLPUBFUN int + xmlParseURIReference (xmlURIPtr uri, + const char *str); +XMLPUBFUN xmlChar * + xmlSaveUri (xmlURIPtr uri); +XMLPUBFUN void + xmlPrintURI (FILE *stream, + xmlURIPtr uri); +XMLPUBFUN xmlChar * + xmlURIEscapeStr (const xmlChar *str, + const xmlChar *list); +XMLPUBFUN char * + xmlURIUnescapeString (const char *str, + int len, + char *target); +XMLPUBFUN int + xmlNormalizeURIPath (char *path); +XMLPUBFUN xmlChar * + xmlURIEscape (const xmlChar *str); +XMLPUBFUN void + xmlFreeURI (xmlURIPtr uri); +XMLPUBFUN xmlChar* + xmlCanonicPath (const xmlChar *path); +XMLPUBFUN xmlChar* + xmlPathToURI (const xmlChar *path); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_URI_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/valid.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/valid.h new file mode 100644 index 00000000..3e04b552 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/valid.h @@ -0,0 +1,450 @@ +/* + * Summary: The DTD validation + * Description: API for the DTD handling and the validity checking + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_VALID_H__ +#define __XML_VALID_H__ + +#include <libxml/xmlversion.h> +#include <libxml/xmlerror.h> +#define XML_TREE_INTERNALS +#include <libxml/tree.h> +#undef XML_TREE_INTERNALS +#include <libxml/list.h> +#include <libxml/xmlautomata.h> +#include <libxml/xmlregexp.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Validation state added for non-determinist content model. + */ +typedef struct _xmlValidState xmlValidState; +typedef xmlValidState *xmlValidStatePtr; + +/** + * xmlValidityErrorFunc: + * @ctx: usually an xmlValidCtxtPtr to a validity error context, + * but comes from ctxt->userData (which normally contains such + * a pointer); ctxt->userData can be changed by the user. + * @msg: the string to format *printf like vararg + * @...: remaining arguments to the format + * + * Callback called when a validity error is found. This is a message + * oriented function similar to an *printf function. + */ +typedef void (*xmlValidityErrorFunc) (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); + +/** + * xmlValidityWarningFunc: + * @ctx: usually an xmlValidCtxtPtr to a validity error context, + * but comes from ctxt->userData (which normally contains such + * a pointer); ctxt->userData can be changed by the user. + * @msg: the string to format *printf like vararg + * @...: remaining arguments to the format + * + * Callback called when a validity warning is found. This is a message + * oriented function similar to an *printf function. + */ +typedef void (*xmlValidityWarningFunc) (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); + +/* + * xmlValidCtxt: + * An xmlValidCtxt is used for error reporting when validating. + */ +typedef struct _xmlValidCtxt xmlValidCtxt; +typedef xmlValidCtxt *xmlValidCtxtPtr; +struct _xmlValidCtxt { + void *userData; /* user specific data block */ + xmlValidityErrorFunc error; /* the callback in case of errors */ + xmlValidityWarningFunc warning; /* the callback in case of warning */ + + /* Node analysis stack used when validating within entities */ + xmlNodePtr node; /* Current parsed Node */ + int nodeNr; /* Depth of the parsing stack */ + int nodeMax; /* Max depth of the parsing stack */ + xmlNodePtr *nodeTab; /* array of nodes */ + + unsigned int flags; /* internal flags */ + xmlDocPtr doc; /* the document */ + int valid; /* temporary validity check result */ + + /* state state used for non-determinist content validation */ + xmlValidState *vstate; /* current state */ + int vstateNr; /* Depth of the validation stack */ + int vstateMax; /* Max depth of the validation stack */ + xmlValidState *vstateTab; /* array of validation states */ + +#ifdef LIBXML_REGEXP_ENABLED + xmlAutomataPtr am; /* the automata */ + xmlAutomataStatePtr state; /* used to build the automata */ +#else + void *am; + void *state; +#endif +}; + +/* + * ALL notation declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlNotationTable; +typedef xmlNotationTable *xmlNotationTablePtr; + +/* + * ALL element declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlElementTable; +typedef xmlElementTable *xmlElementTablePtr; + +/* + * ALL attribute declarations are stored in a table. + * There is one table per DTD. + */ + +typedef struct _xmlHashTable xmlAttributeTable; +typedef xmlAttributeTable *xmlAttributeTablePtr; + +/* + * ALL IDs attributes are stored in a table. + * There is one table per document. + */ + +typedef struct _xmlHashTable xmlIDTable; +typedef xmlIDTable *xmlIDTablePtr; + +/* + * ALL Refs attributes are stored in a table. + * There is one table per document. + */ + +typedef struct _xmlHashTable xmlRefTable; +typedef xmlRefTable *xmlRefTablePtr; + +/* Notation */ +XMLPUBFUN xmlNotationPtr + xmlAddNotationDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *PublicID, + const xmlChar *SystemID); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlNotationTablePtr + xmlCopyNotationTable (xmlNotationTablePtr table); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlFreeNotationTable (xmlNotationTablePtr table); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlDumpNotationDecl (xmlBufferPtr buf, + xmlNotationPtr nota); +XMLPUBFUN void + xmlDumpNotationTable (xmlBufferPtr buf, + xmlNotationTablePtr table); +#endif /* LIBXML_OUTPUT_ENABLED */ + +/* Element Content */ +/* the non Doc version are being deprecated */ +XMLPUBFUN xmlElementContentPtr + xmlNewElementContent (const xmlChar *name, + xmlElementContentType type); +XMLPUBFUN xmlElementContentPtr + xmlCopyElementContent (xmlElementContentPtr content); +XMLPUBFUN void + xmlFreeElementContent (xmlElementContentPtr cur); +/* the new versions with doc argument */ +XMLPUBFUN xmlElementContentPtr + xmlNewDocElementContent (xmlDocPtr doc, + const xmlChar *name, + xmlElementContentType type); +XMLPUBFUN xmlElementContentPtr + xmlCopyDocElementContent(xmlDocPtr doc, + xmlElementContentPtr content); +XMLPUBFUN void + xmlFreeDocElementContent(xmlDocPtr doc, + xmlElementContentPtr cur); +XMLPUBFUN void + xmlSnprintfElementContent(char *buf, + int size, + xmlElementContentPtr content, + int englob); +#ifdef LIBXML_OUTPUT_ENABLED +/* DEPRECATED */ +XMLPUBFUN void + xmlSprintfElementContent(char *buf, + xmlElementContentPtr content, + int englob); +#endif /* LIBXML_OUTPUT_ENABLED */ +/* DEPRECATED */ + +/* Element */ +XMLPUBFUN xmlElementPtr + xmlAddElementDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *name, + xmlElementTypeVal type, + xmlElementContentPtr content); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlElementTablePtr + xmlCopyElementTable (xmlElementTablePtr table); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlFreeElementTable (xmlElementTablePtr table); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlDumpElementTable (xmlBufferPtr buf, + xmlElementTablePtr table); +XMLPUBFUN void + xmlDumpElementDecl (xmlBufferPtr buf, + xmlElementPtr elem); +#endif /* LIBXML_OUTPUT_ENABLED */ + +/* Enumeration */ +XMLPUBFUN xmlEnumerationPtr + xmlCreateEnumeration (const xmlChar *name); +XMLPUBFUN void + xmlFreeEnumeration (xmlEnumerationPtr cur); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlEnumerationPtr + xmlCopyEnumeration (xmlEnumerationPtr cur); +#endif /* LIBXML_TREE_ENABLED */ + +/* Attribute */ +XMLPUBFUN xmlAttributePtr + xmlAddAttributeDecl (xmlValidCtxtPtr ctxt, + xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *ns, + xmlAttributeType type, + xmlAttributeDefault def, + const xmlChar *defaultValue, + xmlEnumerationPtr tree); +#ifdef LIBXML_TREE_ENABLED +XMLPUBFUN xmlAttributeTablePtr + xmlCopyAttributeTable (xmlAttributeTablePtr table); +#endif /* LIBXML_TREE_ENABLED */ +XMLPUBFUN void + xmlFreeAttributeTable (xmlAttributeTablePtr table); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlDumpAttributeTable (xmlBufferPtr buf, + xmlAttributeTablePtr table); +XMLPUBFUN void + xmlDumpAttributeDecl (xmlBufferPtr buf, + xmlAttributePtr attr); +#endif /* LIBXML_OUTPUT_ENABLED */ + +/* IDs */ +XMLPUBFUN xmlIDPtr + xmlAddID (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +XMLPUBFUN void + xmlFreeIDTable (xmlIDTablePtr table); +XMLPUBFUN xmlAttrPtr + xmlGetID (xmlDocPtr doc, + const xmlChar *ID); +XMLPUBFUN int + xmlIsID (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +XMLPUBFUN int + xmlRemoveID (xmlDocPtr doc, + xmlAttrPtr attr); + +/* IDREFs */ +XML_DEPRECATED +XMLPUBFUN xmlRefPtr + xmlAddRef (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *value, + xmlAttrPtr attr); +XML_DEPRECATED +XMLPUBFUN void + xmlFreeRefTable (xmlRefTablePtr table); +XML_DEPRECATED +XMLPUBFUN int + xmlIsRef (xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr); +XML_DEPRECATED +XMLPUBFUN int + xmlRemoveRef (xmlDocPtr doc, + xmlAttrPtr attr); +XML_DEPRECATED +XMLPUBFUN xmlListPtr + xmlGetRefs (xmlDocPtr doc, + const xmlChar *ID); + +/** + * The public function calls related to validity checking. + */ +#ifdef LIBXML_VALID_ENABLED +/* Allocate/Release Validation Contexts */ +XMLPUBFUN xmlValidCtxtPtr + xmlNewValidCtxt(void); +XMLPUBFUN void + xmlFreeValidCtxt(xmlValidCtxtPtr); + +XMLPUBFUN int + xmlValidateRoot (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +XMLPUBFUN int + xmlValidateElementDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlElementPtr elem); +XMLPUBFUN xmlChar * + xmlValidNormalizeAttributeValue(xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +XMLPUBFUN xmlChar * + xmlValidCtxtNormalizeAttributeValue(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *name, + const xmlChar *value); +XMLPUBFUN int + xmlValidateAttributeDecl(xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlAttributePtr attr); +XMLPUBFUN int + xmlValidateAttributeValue(xmlAttributeType type, + const xmlChar *value); +XMLPUBFUN int + xmlValidateNotationDecl (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNotationPtr nota); +XMLPUBFUN int + xmlValidateDtd (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlDtdPtr dtd); +XMLPUBFUN int + xmlValidateDtdFinal (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +XMLPUBFUN int + xmlValidateDocument (xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +XMLPUBFUN int + xmlValidateElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +XMLPUBFUN int + xmlValidateOneElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem); +XMLPUBFUN int + xmlValidateOneAttribute (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + xmlAttrPtr attr, + const xmlChar *value); +XMLPUBFUN int + xmlValidateOneNamespace (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *prefix, + xmlNsPtr ns, + const xmlChar *value); +XMLPUBFUN int + xmlValidateDocumentFinal(xmlValidCtxtPtr ctxt, + xmlDocPtr doc); +#endif /* LIBXML_VALID_ENABLED */ + +#if defined(LIBXML_VALID_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XMLPUBFUN int + xmlValidateNotationUse (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + const xmlChar *notationName); +#endif /* LIBXML_VALID_ENABLED or LIBXML_SCHEMAS_ENABLED */ + +XMLPUBFUN int + xmlIsMixedElement (xmlDocPtr doc, + const xmlChar *name); +XMLPUBFUN xmlAttributePtr + xmlGetDtdAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name); +XMLPUBFUN xmlAttributePtr + xmlGetDtdQAttrDesc (xmlDtdPtr dtd, + const xmlChar *elem, + const xmlChar *name, + const xmlChar *prefix); +XMLPUBFUN xmlNotationPtr + xmlGetDtdNotationDesc (xmlDtdPtr dtd, + const xmlChar *name); +XMLPUBFUN xmlElementPtr + xmlGetDtdQElementDesc (xmlDtdPtr dtd, + const xmlChar *name, + const xmlChar *prefix); +XMLPUBFUN xmlElementPtr + xmlGetDtdElementDesc (xmlDtdPtr dtd, + const xmlChar *name); + +#ifdef LIBXML_VALID_ENABLED + +XMLPUBFUN int + xmlValidGetPotentialChildren(xmlElementContent *ctree, + const xmlChar **names, + int *len, + int max); + +XMLPUBFUN int + xmlValidGetValidElements(xmlNode *prev, + xmlNode *next, + const xmlChar **names, + int max); +XMLPUBFUN int + xmlValidateNameValue (const xmlChar *value); +XMLPUBFUN int + xmlValidateNamesValue (const xmlChar *value); +XMLPUBFUN int + xmlValidateNmtokenValue (const xmlChar *value); +XMLPUBFUN int + xmlValidateNmtokensValue(const xmlChar *value); + +#ifdef LIBXML_REGEXP_ENABLED +/* + * Validation based on the regexp support + */ +XMLPUBFUN int + xmlValidBuildContentModel(xmlValidCtxtPtr ctxt, + xmlElementPtr elem); + +XMLPUBFUN int + xmlValidatePushElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *qname); +XMLPUBFUN int + xmlValidatePushCData (xmlValidCtxtPtr ctxt, + const xmlChar *data, + int len); +XMLPUBFUN int + xmlValidatePopElement (xmlValidCtxtPtr ctxt, + xmlDocPtr doc, + xmlNodePtr elem, + const xmlChar *qname); +#endif /* LIBXML_REGEXP_ENABLED */ +#endif /* LIBXML_VALID_ENABLED */ +#ifdef __cplusplus +} +#endif +#endif /* __XML_VALID_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xinclude.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xinclude.h new file mode 100644 index 00000000..e1d135b3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xinclude.h @@ -0,0 +1,129 @@ +/* + * Summary: implementation of XInclude + * Description: API to handle XInclude processing, + * implements the + * World Wide Web Consortium Last Call Working Draft 10 November 2003 + * http://www.w3.org/TR/2003/WD-xinclude-20031110 + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XINCLUDE_H__ +#define __XML_XINCLUDE_H__ + +#include <libxml/xmlversion.h> +#include <libxml/tree.h> + +#ifdef LIBXML_XINCLUDE_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XINCLUDE_NS: + * + * Macro defining the Xinclude namespace: http://www.w3.org/2003/XInclude + */ +#define XINCLUDE_NS (const xmlChar *) "http://www.w3.org/2003/XInclude" +/** + * XINCLUDE_OLD_NS: + * + * Macro defining the draft Xinclude namespace: http://www.w3.org/2001/XInclude + */ +#define XINCLUDE_OLD_NS (const xmlChar *) "http://www.w3.org/2001/XInclude" +/** + * XINCLUDE_NODE: + * + * Macro defining "include" + */ +#define XINCLUDE_NODE (const xmlChar *) "include" +/** + * XINCLUDE_FALLBACK: + * + * Macro defining "fallback" + */ +#define XINCLUDE_FALLBACK (const xmlChar *) "fallback" +/** + * XINCLUDE_HREF: + * + * Macro defining "href" + */ +#define XINCLUDE_HREF (const xmlChar *) "href" +/** + * XINCLUDE_PARSE: + * + * Macro defining "parse" + */ +#define XINCLUDE_PARSE (const xmlChar *) "parse" +/** + * XINCLUDE_PARSE_XML: + * + * Macro defining "xml" + */ +#define XINCLUDE_PARSE_XML (const xmlChar *) "xml" +/** + * XINCLUDE_PARSE_TEXT: + * + * Macro defining "text" + */ +#define XINCLUDE_PARSE_TEXT (const xmlChar *) "text" +/** + * XINCLUDE_PARSE_ENCODING: + * + * Macro defining "encoding" + */ +#define XINCLUDE_PARSE_ENCODING (const xmlChar *) "encoding" +/** + * XINCLUDE_PARSE_XPOINTER: + * + * Macro defining "xpointer" + */ +#define XINCLUDE_PARSE_XPOINTER (const xmlChar *) "xpointer" + +typedef struct _xmlXIncludeCtxt xmlXIncludeCtxt; +typedef xmlXIncludeCtxt *xmlXIncludeCtxtPtr; + +/* + * standalone processing + */ +XMLPUBFUN int + xmlXIncludeProcess (xmlDocPtr doc); +XMLPUBFUN int + xmlXIncludeProcessFlags (xmlDocPtr doc, + int flags); +XMLPUBFUN int + xmlXIncludeProcessFlagsData(xmlDocPtr doc, + int flags, + void *data); +XMLPUBFUN int + xmlXIncludeProcessTreeFlagsData(xmlNodePtr tree, + int flags, + void *data); +XMLPUBFUN int + xmlXIncludeProcessTree (xmlNodePtr tree); +XMLPUBFUN int + xmlXIncludeProcessTreeFlags(xmlNodePtr tree, + int flags); +/* + * contextual processing + */ +XMLPUBFUN xmlXIncludeCtxtPtr + xmlXIncludeNewContext (xmlDocPtr doc); +XMLPUBFUN int + xmlXIncludeSetFlags (xmlXIncludeCtxtPtr ctxt, + int flags); +XMLPUBFUN void + xmlXIncludeFreeContext (xmlXIncludeCtxtPtr ctxt); +XMLPUBFUN int + xmlXIncludeProcessNode (xmlXIncludeCtxtPtr ctxt, + xmlNodePtr tree); +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_XINCLUDE_ENABLED */ + +#endif /* __XML_XINCLUDE_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xlink.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xlink.h new file mode 100644 index 00000000..10657366 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xlink.h @@ -0,0 +1,189 @@ +/* + * Summary: unfinished XLink detection module + * Description: unfinished XLink detection module + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XLINK_H__ +#define __XML_XLINK_H__ + +#include <libxml/xmlversion.h> +#include <libxml/tree.h> + +#ifdef LIBXML_XPTR_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Various defines for the various Link properties. + * + * NOTE: the link detection layer will try to resolve QName expansion + * of namespaces. If "foo" is the prefix for "http://foo.com/" + * then the link detection layer will expand role="foo:myrole" + * to "http://foo.com/:myrole". + * NOTE: the link detection layer will expand URI-References found on + * href attributes by using the base mechanism if found. + */ +typedef xmlChar *xlinkHRef; +typedef xmlChar *xlinkRole; +typedef xmlChar *xlinkTitle; + +typedef enum { + XLINK_TYPE_NONE = 0, + XLINK_TYPE_SIMPLE, + XLINK_TYPE_EXTENDED, + XLINK_TYPE_EXTENDED_SET +} xlinkType; + +typedef enum { + XLINK_SHOW_NONE = 0, + XLINK_SHOW_NEW, + XLINK_SHOW_EMBED, + XLINK_SHOW_REPLACE +} xlinkShow; + +typedef enum { + XLINK_ACTUATE_NONE = 0, + XLINK_ACTUATE_AUTO, + XLINK_ACTUATE_ONREQUEST +} xlinkActuate; + +/** + * xlinkNodeDetectFunc: + * @ctx: user data pointer + * @node: the node to check + * + * This is the prototype for the link detection routine. + * It calls the default link detection callbacks upon link detection. + */ +typedef void (*xlinkNodeDetectFunc) (void *ctx, xmlNodePtr node); + +/* + * The link detection module interact with the upper layers using + * a set of callback registered at parsing time. + */ + +/** + * xlinkSimpleLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @href: the target of the link + * @role: the role string + * @title: the link title + * + * This is the prototype for a simple link detection callback. + */ +typedef void +(*xlinkSimpleLinkFunk) (void *ctx, + xmlNodePtr node, + const xlinkHRef href, + const xlinkRole role, + const xlinkTitle title); + +/** + * xlinkExtendedLinkFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbArcs: the number of arcs detected on the link + * @from: pointer to the array of source roles found on the arcs + * @to: pointer to the array of target roles found on the arcs + * @show: array of values for the show attributes found on the arcs + * @actuate: array of values for the actuate attributes found on the arcs + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link detection callback. + */ +typedef void +(*xlinkExtendedLinkFunk)(void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbArcs, + const xlinkRole *from, + const xlinkRole *to, + xlinkShow *show, + xlinkActuate *actuate, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * xlinkExtendedLinkSetFunk: + * @ctx: user data pointer + * @node: the node carrying the link + * @nbLocators: the number of locators detected on the link + * @hrefs: pointer to the array of locator hrefs + * @roles: pointer to the array of locator roles + * @nbTitles: the number of titles detected on the link + * @title: array of titles detected on the link + * @langs: array of xml:lang values for the titles + * + * This is the prototype for a extended link set detection callback. + */ +typedef void +(*xlinkExtendedLinkSetFunk) (void *ctx, + xmlNodePtr node, + int nbLocators, + const xlinkHRef *hrefs, + const xlinkRole *roles, + int nbTitles, + const xlinkTitle *titles, + const xmlChar **langs); + +/** + * This is the structure containing a set of Links detection callbacks. + * + * There is no default xlink callbacks, if one want to get link + * recognition activated, those call backs must be provided before parsing. + */ +typedef struct _xlinkHandler xlinkHandler; +typedef xlinkHandler *xlinkHandlerPtr; +struct _xlinkHandler { + xlinkSimpleLinkFunk simple; + xlinkExtendedLinkFunk extended; + xlinkExtendedLinkSetFunk set; +}; + +/* + * The default detection routine, can be overridden, they call the default + * detection callbacks. + */ + +XMLPUBFUN xlinkNodeDetectFunc + xlinkGetDefaultDetect (void); +XMLPUBFUN void + xlinkSetDefaultDetect (xlinkNodeDetectFunc func); + +/* + * Routines to set/get the default handlers. + */ +XMLPUBFUN xlinkHandlerPtr + xlinkGetDefaultHandler (void); +XMLPUBFUN void + xlinkSetDefaultHandler (xlinkHandlerPtr handler); + +/* + * Link detection module itself. + */ +XMLPUBFUN xlinkType + xlinkIsLink (xmlDocPtr doc, + xmlNodePtr node); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_XPTR_ENABLED */ + +#endif /* __XML_XLINK_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlIO.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlIO.h new file mode 100644 index 00000000..2487be3b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlIO.h @@ -0,0 +1,421 @@ +/* + * Summary: interface for the I/O interfaces used by the parser + * Description: interface for the I/O interfaces used by the parser + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_IO_H__ +#define __XML_IO_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/encoding.h> +#define XML_TREE_INTERNALS +#include <libxml/tree.h> +#undef XML_TREE_INTERNALS + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Those are the functions and datatypes for the parser input + * I/O structures. + */ + +/** + * xmlInputMatchCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Input API to detect if the current handler + * can provide input functionality for this resource. + * + * Returns 1 if yes and 0 if another Input module should be used + */ +typedef int (*xmlInputMatchCallback) (char const *filename); +/** + * xmlInputOpenCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Input API to open the resource + * + * Returns an Input context or NULL in case or error + */ +typedef void * (*xmlInputOpenCallback) (char const *filename); +/** + * xmlInputReadCallback: + * @context: an Input context + * @buffer: the buffer to store data read + * @len: the length of the buffer in bytes + * + * Callback used in the I/O Input API to read the resource + * + * Returns the number of bytes read or -1 in case of error + */ +typedef int (*xmlInputReadCallback) (void * context, char * buffer, int len); +/** + * xmlInputCloseCallback: + * @context: an Input context + * + * Callback used in the I/O Input API to close the resource + * + * Returns 0 or -1 in case of error + */ +typedef int (*xmlInputCloseCallback) (void * context); + +#ifdef LIBXML_OUTPUT_ENABLED +/* + * Those are the functions and datatypes for the library output + * I/O structures. + */ + +/** + * xmlOutputMatchCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Output API to detect if the current handler + * can provide output functionality for this resource. + * + * Returns 1 if yes and 0 if another Output module should be used + */ +typedef int (*xmlOutputMatchCallback) (char const *filename); +/** + * xmlOutputOpenCallback: + * @filename: the filename or URI + * + * Callback used in the I/O Output API to open the resource + * + * Returns an Output context or NULL in case or error + */ +typedef void * (*xmlOutputOpenCallback) (char const *filename); +/** + * xmlOutputWriteCallback: + * @context: an Output context + * @buffer: the buffer of data to write + * @len: the length of the buffer in bytes + * + * Callback used in the I/O Output API to write to the resource + * + * Returns the number of bytes written or -1 in case of error + */ +typedef int (*xmlOutputWriteCallback) (void * context, const char * buffer, + int len); +/** + * xmlOutputCloseCallback: + * @context: an Output context + * + * Callback used in the I/O Output API to close the resource + * + * Returns 0 or -1 in case of error + */ +typedef int (*xmlOutputCloseCallback) (void * context); +#endif /* LIBXML_OUTPUT_ENABLED */ + +/** + * xmlParserInputBufferCreateFilenameFunc: + * @URI: the URI to read from + * @enc: the requested source encoding + * + * Signature for the function doing the lookup for a suitable input method + * corresponding to an URI. + * + * Returns the new xmlParserInputBufferPtr in case of success or NULL if no + * method was found. + */ +typedef xmlParserInputBufferPtr +(*xmlParserInputBufferCreateFilenameFunc)(const char *URI, xmlCharEncoding enc); + +/** + * xmlOutputBufferCreateFilenameFunc: + * @URI: the URI to write to + * @enc: the requested target encoding + * + * Signature for the function doing the lookup for a suitable output method + * corresponding to an URI. + * + * Returns the new xmlOutputBufferPtr in case of success or NULL if no + * method was found. + */ +typedef xmlOutputBufferPtr +(*xmlOutputBufferCreateFilenameFunc)(const char *URI, + xmlCharEncodingHandlerPtr encoder, int compression); + +struct _xmlParserInputBuffer { + void* context; + xmlInputReadCallback readcallback; + xmlInputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufPtr buffer; /* Local buffer encoded in UTF-8 */ + xmlBufPtr raw; /* if encoder != NULL buffer for raw input */ + int compressed; /* -1=unknown, 0=not compressed, 1=compressed */ + int error; + unsigned long rawconsumed;/* amount consumed from raw */ +}; + + +#ifdef LIBXML_OUTPUT_ENABLED +struct _xmlOutputBuffer { + void* context; + xmlOutputWriteCallback writecallback; + xmlOutputCloseCallback closecallback; + + xmlCharEncodingHandlerPtr encoder; /* I18N conversions to UTF-8 */ + + xmlBufPtr buffer; /* Local buffer encoded in UTF-8 or ISOLatin */ + xmlBufPtr conv; /* if encoder != NULL buffer for output */ + int written; /* total number of byte written */ + int error; +}; +#endif /* LIBXML_OUTPUT_ENABLED */ + +/** DOC_DISABLE */ +#define XML_GLOBALS_IO \ + XML_OP(xmlParserInputBufferCreateFilenameValue, \ + xmlParserInputBufferCreateFilenameFunc, XML_DEPRECATED) \ + XML_OP(xmlOutputBufferCreateFilenameValue, \ + xmlOutputBufferCreateFilenameFunc, XML_DEPRECATED) + +#define XML_OP XML_DECLARE_GLOBAL +XML_GLOBALS_IO +#undef XML_OP + +#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define xmlParserInputBufferCreateFilenameValue \ + XML_GLOBAL_MACRO(xmlParserInputBufferCreateFilenameValue) + #define xmlOutputBufferCreateFilenameValue \ + XML_GLOBAL_MACRO(xmlOutputBufferCreateFilenameValue) +#endif +/** DOC_ENABLE */ + +/* + * Interfaces for input + */ +XMLPUBFUN void + xmlCleanupInputCallbacks (void); + +XMLPUBFUN int + xmlPopInputCallbacks (void); + +XMLPUBFUN void + xmlRegisterDefaultInputCallbacks (void); +XMLPUBFUN xmlParserInputBufferPtr + xmlAllocParserInputBuffer (xmlCharEncoding enc); + +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateFilename (const char *URI, + xmlCharEncoding enc); +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateFile (FILE *file, + xmlCharEncoding enc); +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateFd (int fd, + xmlCharEncoding enc); +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateMem (const char *mem, int size, + xmlCharEncoding enc); +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateStatic (const char *mem, int size, + xmlCharEncoding enc); +XMLPUBFUN xmlParserInputBufferPtr + xmlParserInputBufferCreateIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + xmlCharEncoding enc); +XMLPUBFUN int + xmlParserInputBufferRead (xmlParserInputBufferPtr in, + int len); +XMLPUBFUN int + xmlParserInputBufferGrow (xmlParserInputBufferPtr in, + int len); +XMLPUBFUN int + xmlParserInputBufferPush (xmlParserInputBufferPtr in, + int len, + const char *buf); +XMLPUBFUN void + xmlFreeParserInputBuffer (xmlParserInputBufferPtr in); +XMLPUBFUN char * + xmlParserGetDirectory (const char *filename); + +XMLPUBFUN int + xmlRegisterInputCallbacks (xmlInputMatchCallback matchFunc, + xmlInputOpenCallback openFunc, + xmlInputReadCallback readFunc, + xmlInputCloseCallback closeFunc); + +xmlParserInputBufferPtr + __xmlParserInputBufferCreateFilename(const char *URI, + xmlCharEncoding enc); + +#ifdef LIBXML_OUTPUT_ENABLED +/* + * Interfaces for output + */ +XMLPUBFUN void + xmlCleanupOutputCallbacks (void); +XMLPUBFUN int + xmlPopOutputCallbacks (void); +XMLPUBFUN void + xmlRegisterDefaultOutputCallbacks(void); +XMLPUBFUN xmlOutputBufferPtr + xmlAllocOutputBuffer (xmlCharEncodingHandlerPtr encoder); + +XMLPUBFUN xmlOutputBufferPtr + xmlOutputBufferCreateFilename (const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression); + +XMLPUBFUN xmlOutputBufferPtr + xmlOutputBufferCreateFile (FILE *file, + xmlCharEncodingHandlerPtr encoder); + +XMLPUBFUN xmlOutputBufferPtr + xmlOutputBufferCreateBuffer (xmlBufferPtr buffer, + xmlCharEncodingHandlerPtr encoder); + +XMLPUBFUN xmlOutputBufferPtr + xmlOutputBufferCreateFd (int fd, + xmlCharEncodingHandlerPtr encoder); + +XMLPUBFUN xmlOutputBufferPtr + xmlOutputBufferCreateIO (xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void *ioctx, + xmlCharEncodingHandlerPtr encoder); + +/* Couple of APIs to get the output without digging into the buffers */ +XMLPUBFUN const xmlChar * + xmlOutputBufferGetContent (xmlOutputBufferPtr out); +XMLPUBFUN size_t + xmlOutputBufferGetSize (xmlOutputBufferPtr out); + +XMLPUBFUN int + xmlOutputBufferWrite (xmlOutputBufferPtr out, + int len, + const char *buf); +XMLPUBFUN int + xmlOutputBufferWriteString (xmlOutputBufferPtr out, + const char *str); +XMLPUBFUN int + xmlOutputBufferWriteEscape (xmlOutputBufferPtr out, + const xmlChar *str, + xmlCharEncodingOutputFunc escaping); + +XMLPUBFUN int + xmlOutputBufferFlush (xmlOutputBufferPtr out); +XMLPUBFUN int + xmlOutputBufferClose (xmlOutputBufferPtr out); + +XMLPUBFUN int + xmlRegisterOutputCallbacks (xmlOutputMatchCallback matchFunc, + xmlOutputOpenCallback openFunc, + xmlOutputWriteCallback writeFunc, + xmlOutputCloseCallback closeFunc); + +xmlOutputBufferPtr + __xmlOutputBufferCreateFilename(const char *URI, + xmlCharEncodingHandlerPtr encoder, + int compression); + +#ifdef LIBXML_HTTP_ENABLED +/* This function only exists if HTTP support built into the library */ +XMLPUBFUN void + xmlRegisterHTTPPostCallbacks (void ); +#endif /* LIBXML_HTTP_ENABLED */ + +#endif /* LIBXML_OUTPUT_ENABLED */ + +XMLPUBFUN xmlParserInputPtr + xmlCheckHTTPInput (xmlParserCtxtPtr ctxt, + xmlParserInputPtr ret); + +/* + * A predefined entity loader disabling network accesses + */ +XMLPUBFUN xmlParserInputPtr + xmlNoNetExternalEntityLoader (const char *URL, + const char *ID, + xmlParserCtxtPtr ctxt); + +/* + * xmlNormalizeWindowsPath is obsolete, don't use it. + * Check xmlCanonicPath in uri.h for a better alternative. + */ +XMLPUBFUN xmlChar * + xmlNormalizeWindowsPath (const xmlChar *path); + +XMLPUBFUN int + xmlCheckFilename (const char *path); +/** + * Default 'file://' protocol callbacks + */ +XMLPUBFUN int + xmlFileMatch (const char *filename); +XMLPUBFUN void * + xmlFileOpen (const char *filename); +XMLPUBFUN int + xmlFileRead (void * context, + char * buffer, + int len); +XMLPUBFUN int + xmlFileClose (void * context); + +/** + * Default 'http://' protocol callbacks + */ +#ifdef LIBXML_HTTP_ENABLED +XMLPUBFUN int + xmlIOHTTPMatch (const char *filename); +XMLPUBFUN void * + xmlIOHTTPOpen (const char *filename); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void * + xmlIOHTTPOpenW (const char * post_uri, + int compression ); +#endif /* LIBXML_OUTPUT_ENABLED */ +XMLPUBFUN int + xmlIOHTTPRead (void * context, + char * buffer, + int len); +XMLPUBFUN int + xmlIOHTTPClose (void * context); +#endif /* LIBXML_HTTP_ENABLED */ + +/** + * Default 'ftp://' protocol callbacks + */ +#if defined(LIBXML_FTP_ENABLED) +XMLPUBFUN int + xmlIOFTPMatch (const char *filename); +XMLPUBFUN void * + xmlIOFTPOpen (const char *filename); +XMLPUBFUN int + xmlIOFTPRead (void * context, + char * buffer, + int len); +XMLPUBFUN int + xmlIOFTPClose (void * context); +#endif /* defined(LIBXML_FTP_ENABLED) */ + +XMLPUBFUN xmlParserInputBufferCreateFilenameFunc + xmlParserInputBufferCreateFilenameDefault( + xmlParserInputBufferCreateFilenameFunc func); +XMLPUBFUN xmlOutputBufferCreateFilenameFunc + xmlOutputBufferCreateFilenameDefault( + xmlOutputBufferCreateFilenameFunc func); +XMLPUBFUN xmlOutputBufferCreateFilenameFunc + xmlThrDefOutputBufferCreateFilenameDefault( + xmlOutputBufferCreateFilenameFunc func); +XMLPUBFUN xmlParserInputBufferCreateFilenameFunc + xmlThrDefParserInputBufferCreateFilenameDefault( + xmlParserInputBufferCreateFilenameFunc func); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_IO_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlautomata.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlautomata.h new file mode 100644 index 00000000..ea38eb37 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlautomata.h @@ -0,0 +1,146 @@ +/* + * Summary: API to build regexp automata + * Description: the API to build regexp automata + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_AUTOMATA_H__ +#define __XML_AUTOMATA_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_REGEXP_ENABLED +#ifdef LIBXML_AUTOMATA_ENABLED + +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlAutomataPtr: + * + * A libxml automata description, It can be compiled into a regexp + */ +typedef struct _xmlAutomata xmlAutomata; +typedef xmlAutomata *xmlAutomataPtr; + +/** + * xmlAutomataStatePtr: + * + * A state int the automata description, + */ +typedef struct _xmlAutomataState xmlAutomataState; +typedef xmlAutomataState *xmlAutomataStatePtr; + +/* + * Building API + */ +XMLPUBFUN xmlAutomataPtr + xmlNewAutomata (void); +XMLPUBFUN void + xmlFreeAutomata (xmlAutomataPtr am); + +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataGetInitState (xmlAutomataPtr am); +XMLPUBFUN int + xmlAutomataSetFinalState (xmlAutomataPtr am, + xmlAutomataStatePtr state); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewState (xmlAutomataPtr am); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewTransition (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewTransition2 (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + const xmlChar *token2, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewNegTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + const xmlChar *token2, + void *data); + +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewCountTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + int min, + int max, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewCountTrans2 (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + const xmlChar *token2, + int min, + int max, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewOnceTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + int min, + int max, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewOnceTrans2 (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + const xmlChar *token, + const xmlChar *token2, + int min, + int max, + void *data); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewAllTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int lax); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewEpsilon (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewCountedTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int counter); +XMLPUBFUN xmlAutomataStatePtr + xmlAutomataNewCounterTrans (xmlAutomataPtr am, + xmlAutomataStatePtr from, + xmlAutomataStatePtr to, + int counter); +XMLPUBFUN int + xmlAutomataNewCounter (xmlAutomataPtr am, + int min, + int max); + +XMLPUBFUN struct _xmlRegexp * + xmlAutomataCompile (xmlAutomataPtr am); +XMLPUBFUN int + xmlAutomataIsDeterminist (xmlAutomataPtr am); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_AUTOMATA_ENABLED */ +#endif /* LIBXML_REGEXP_ENABLED */ + +#endif /* __XML_AUTOMATA_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlerror.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlerror.h new file mode 100644 index 00000000..1f0ab4a3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlerror.h @@ -0,0 +1,948 @@ +/* + * Summary: error handling + * Description: the API used to report errors + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_ERROR_H__ +#define __XML_ERROR_H__ + +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlErrorLevel: + * + * Indicates the level of an error + */ +typedef enum { + XML_ERR_NONE = 0, + XML_ERR_WARNING = 1, /* A simple warning */ + XML_ERR_ERROR = 2, /* A recoverable error */ + XML_ERR_FATAL = 3 /* A fatal error */ +} xmlErrorLevel; + +/** + * xmlErrorDomain: + * + * Indicates where an error may have come from + */ +typedef enum { + XML_FROM_NONE = 0, + XML_FROM_PARSER, /* The XML parser */ + XML_FROM_TREE, /* The tree module */ + XML_FROM_NAMESPACE, /* The XML Namespace module */ + XML_FROM_DTD, /* The XML DTD validation with parser context*/ + XML_FROM_HTML, /* The HTML parser */ + XML_FROM_MEMORY, /* The memory allocator */ + XML_FROM_OUTPUT, /* The serialization code */ + XML_FROM_IO, /* The Input/Output stack */ + XML_FROM_FTP, /* The FTP module */ + XML_FROM_HTTP, /* The HTTP module */ + XML_FROM_XINCLUDE, /* The XInclude processing */ + XML_FROM_XPATH, /* The XPath module */ + XML_FROM_XPOINTER, /* The XPointer module */ + XML_FROM_REGEXP, /* The regular expressions module */ + XML_FROM_DATATYPE, /* The W3C XML Schemas Datatype module */ + XML_FROM_SCHEMASP, /* The W3C XML Schemas parser module */ + XML_FROM_SCHEMASV, /* The W3C XML Schemas validation module */ + XML_FROM_RELAXNGP, /* The Relax-NG parser module */ + XML_FROM_RELAXNGV, /* The Relax-NG validator module */ + XML_FROM_CATALOG, /* The Catalog module */ + XML_FROM_C14N, /* The Canonicalization module */ + XML_FROM_XSLT, /* The XSLT engine from libxslt */ + XML_FROM_VALID, /* The XML DTD validation with valid context */ + XML_FROM_CHECK, /* The error checking module */ + XML_FROM_WRITER, /* The xmlwriter module */ + XML_FROM_MODULE, /* The dynamically loaded module module*/ + XML_FROM_I18N, /* The module handling character conversion */ + XML_FROM_SCHEMATRONV,/* The Schematron validator module */ + XML_FROM_BUFFER, /* The buffers module */ + XML_FROM_URI /* The URI module */ +} xmlErrorDomain; + +/** + * xmlError: + * + * An XML Error instance. + */ + +typedef struct _xmlError xmlError; +typedef xmlError *xmlErrorPtr; +struct _xmlError { + int domain; /* What part of the library raised this error */ + int code; /* The error code, e.g. an xmlParserError */ + char *message;/* human-readable informative error message */ + xmlErrorLevel level;/* how consequent is the error */ + char *file; /* the filename */ + int line; /* the line number if available */ + char *str1; /* extra string information */ + char *str2; /* extra string information */ + char *str3; /* extra string information */ + int int1; /* extra number information */ + int int2; /* error column # or 0 if N/A (todo: rename field when we would brk ABI) */ + void *ctxt; /* the parser context if available */ + void *node; /* the node in the tree */ +}; + +/** + * xmlParserError: + * + * This is an error that the XML (or HTML) parser can generate + */ +typedef enum { + XML_ERR_OK = 0, + XML_ERR_INTERNAL_ERROR, /* 1 */ + XML_ERR_NO_MEMORY, /* 2 */ + XML_ERR_DOCUMENT_START, /* 3 */ + XML_ERR_DOCUMENT_EMPTY, /* 4 */ + XML_ERR_DOCUMENT_END, /* 5 */ + XML_ERR_INVALID_HEX_CHARREF, /* 6 */ + XML_ERR_INVALID_DEC_CHARREF, /* 7 */ + XML_ERR_INVALID_CHARREF, /* 8 */ + XML_ERR_INVALID_CHAR, /* 9 */ + XML_ERR_CHARREF_AT_EOF, /* 10 */ + XML_ERR_CHARREF_IN_PROLOG, /* 11 */ + XML_ERR_CHARREF_IN_EPILOG, /* 12 */ + XML_ERR_CHARREF_IN_DTD, /* 13 */ + XML_ERR_ENTITYREF_AT_EOF, /* 14 */ + XML_ERR_ENTITYREF_IN_PROLOG, /* 15 */ + XML_ERR_ENTITYREF_IN_EPILOG, /* 16 */ + XML_ERR_ENTITYREF_IN_DTD, /* 17 */ + XML_ERR_PEREF_AT_EOF, /* 18 */ + XML_ERR_PEREF_IN_PROLOG, /* 19 */ + XML_ERR_PEREF_IN_EPILOG, /* 20 */ + XML_ERR_PEREF_IN_INT_SUBSET, /* 21 */ + XML_ERR_ENTITYREF_NO_NAME, /* 22 */ + XML_ERR_ENTITYREF_SEMICOL_MISSING, /* 23 */ + XML_ERR_PEREF_NO_NAME, /* 24 */ + XML_ERR_PEREF_SEMICOL_MISSING, /* 25 */ + XML_ERR_UNDECLARED_ENTITY, /* 26 */ + XML_WAR_UNDECLARED_ENTITY, /* 27 */ + XML_ERR_UNPARSED_ENTITY, /* 28 */ + XML_ERR_ENTITY_IS_EXTERNAL, /* 29 */ + XML_ERR_ENTITY_IS_PARAMETER, /* 30 */ + XML_ERR_UNKNOWN_ENCODING, /* 31 */ + XML_ERR_UNSUPPORTED_ENCODING, /* 32 */ + XML_ERR_STRING_NOT_STARTED, /* 33 */ + XML_ERR_STRING_NOT_CLOSED, /* 34 */ + XML_ERR_NS_DECL_ERROR, /* 35 */ + XML_ERR_ENTITY_NOT_STARTED, /* 36 */ + XML_ERR_ENTITY_NOT_FINISHED, /* 37 */ + XML_ERR_LT_IN_ATTRIBUTE, /* 38 */ + XML_ERR_ATTRIBUTE_NOT_STARTED, /* 39 */ + XML_ERR_ATTRIBUTE_NOT_FINISHED, /* 40 */ + XML_ERR_ATTRIBUTE_WITHOUT_VALUE, /* 41 */ + XML_ERR_ATTRIBUTE_REDEFINED, /* 42 */ + XML_ERR_LITERAL_NOT_STARTED, /* 43 */ + XML_ERR_LITERAL_NOT_FINISHED, /* 44 */ + XML_ERR_COMMENT_NOT_FINISHED, /* 45 */ + XML_ERR_PI_NOT_STARTED, /* 46 */ + XML_ERR_PI_NOT_FINISHED, /* 47 */ + XML_ERR_NOTATION_NOT_STARTED, /* 48 */ + XML_ERR_NOTATION_NOT_FINISHED, /* 49 */ + XML_ERR_ATTLIST_NOT_STARTED, /* 50 */ + XML_ERR_ATTLIST_NOT_FINISHED, /* 51 */ + XML_ERR_MIXED_NOT_STARTED, /* 52 */ + XML_ERR_MIXED_NOT_FINISHED, /* 53 */ + XML_ERR_ELEMCONTENT_NOT_STARTED, /* 54 */ + XML_ERR_ELEMCONTENT_NOT_FINISHED, /* 55 */ + XML_ERR_XMLDECL_NOT_STARTED, /* 56 */ + XML_ERR_XMLDECL_NOT_FINISHED, /* 57 */ + XML_ERR_CONDSEC_NOT_STARTED, /* 58 */ + XML_ERR_CONDSEC_NOT_FINISHED, /* 59 */ + XML_ERR_EXT_SUBSET_NOT_FINISHED, /* 60 */ + XML_ERR_DOCTYPE_NOT_FINISHED, /* 61 */ + XML_ERR_MISPLACED_CDATA_END, /* 62 */ + XML_ERR_CDATA_NOT_FINISHED, /* 63 */ + XML_ERR_RESERVED_XML_NAME, /* 64 */ + XML_ERR_SPACE_REQUIRED, /* 65 */ + XML_ERR_SEPARATOR_REQUIRED, /* 66 */ + XML_ERR_NMTOKEN_REQUIRED, /* 67 */ + XML_ERR_NAME_REQUIRED, /* 68 */ + XML_ERR_PCDATA_REQUIRED, /* 69 */ + XML_ERR_URI_REQUIRED, /* 70 */ + XML_ERR_PUBID_REQUIRED, /* 71 */ + XML_ERR_LT_REQUIRED, /* 72 */ + XML_ERR_GT_REQUIRED, /* 73 */ + XML_ERR_LTSLASH_REQUIRED, /* 74 */ + XML_ERR_EQUAL_REQUIRED, /* 75 */ + XML_ERR_TAG_NAME_MISMATCH, /* 76 */ + XML_ERR_TAG_NOT_FINISHED, /* 77 */ + XML_ERR_STANDALONE_VALUE, /* 78 */ + XML_ERR_ENCODING_NAME, /* 79 */ + XML_ERR_HYPHEN_IN_COMMENT, /* 80 */ + XML_ERR_INVALID_ENCODING, /* 81 */ + XML_ERR_EXT_ENTITY_STANDALONE, /* 82 */ + XML_ERR_CONDSEC_INVALID, /* 83 */ + XML_ERR_VALUE_REQUIRED, /* 84 */ + XML_ERR_NOT_WELL_BALANCED, /* 85 */ + XML_ERR_EXTRA_CONTENT, /* 86 */ + XML_ERR_ENTITY_CHAR_ERROR, /* 87 */ + XML_ERR_ENTITY_PE_INTERNAL, /* 88 */ + XML_ERR_ENTITY_LOOP, /* 89 */ + XML_ERR_ENTITY_BOUNDARY, /* 90 */ + XML_ERR_INVALID_URI, /* 91 */ + XML_ERR_URI_FRAGMENT, /* 92 */ + XML_WAR_CATALOG_PI, /* 93 */ + XML_ERR_NO_DTD, /* 94 */ + XML_ERR_CONDSEC_INVALID_KEYWORD, /* 95 */ + XML_ERR_VERSION_MISSING, /* 96 */ + XML_WAR_UNKNOWN_VERSION, /* 97 */ + XML_WAR_LANG_VALUE, /* 98 */ + XML_WAR_NS_URI, /* 99 */ + XML_WAR_NS_URI_RELATIVE, /* 100 */ + XML_ERR_MISSING_ENCODING, /* 101 */ + XML_WAR_SPACE_VALUE, /* 102 */ + XML_ERR_NOT_STANDALONE, /* 103 */ + XML_ERR_ENTITY_PROCESSING, /* 104 */ + XML_ERR_NOTATION_PROCESSING, /* 105 */ + XML_WAR_NS_COLUMN, /* 106 */ + XML_WAR_ENTITY_REDEFINED, /* 107 */ + XML_ERR_UNKNOWN_VERSION, /* 108 */ + XML_ERR_VERSION_MISMATCH, /* 109 */ + XML_ERR_NAME_TOO_LONG, /* 110 */ + XML_ERR_USER_STOP, /* 111 */ + XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */ + XML_WAR_ENCODING_MISMATCH, /* 113 */ + XML_NS_ERR_XML_NAMESPACE = 200, + XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */ + XML_NS_ERR_QNAME, /* 202 */ + XML_NS_ERR_ATTRIBUTE_REDEFINED, /* 203 */ + XML_NS_ERR_EMPTY, /* 204 */ + XML_NS_ERR_COLON, /* 205 */ + XML_DTD_ATTRIBUTE_DEFAULT = 500, + XML_DTD_ATTRIBUTE_REDEFINED, /* 501 */ + XML_DTD_ATTRIBUTE_VALUE, /* 502 */ + XML_DTD_CONTENT_ERROR, /* 503 */ + XML_DTD_CONTENT_MODEL, /* 504 */ + XML_DTD_CONTENT_NOT_DETERMINIST, /* 505 */ + XML_DTD_DIFFERENT_PREFIX, /* 506 */ + XML_DTD_ELEM_DEFAULT_NAMESPACE, /* 507 */ + XML_DTD_ELEM_NAMESPACE, /* 508 */ + XML_DTD_ELEM_REDEFINED, /* 509 */ + XML_DTD_EMPTY_NOTATION, /* 510 */ + XML_DTD_ENTITY_TYPE, /* 511 */ + XML_DTD_ID_FIXED, /* 512 */ + XML_DTD_ID_REDEFINED, /* 513 */ + XML_DTD_ID_SUBSET, /* 514 */ + XML_DTD_INVALID_CHILD, /* 515 */ + XML_DTD_INVALID_DEFAULT, /* 516 */ + XML_DTD_LOAD_ERROR, /* 517 */ + XML_DTD_MISSING_ATTRIBUTE, /* 518 */ + XML_DTD_MIXED_CORRUPT, /* 519 */ + XML_DTD_MULTIPLE_ID, /* 520 */ + XML_DTD_NO_DOC, /* 521 */ + XML_DTD_NO_DTD, /* 522 */ + XML_DTD_NO_ELEM_NAME, /* 523 */ + XML_DTD_NO_PREFIX, /* 524 */ + XML_DTD_NO_ROOT, /* 525 */ + XML_DTD_NOTATION_REDEFINED, /* 526 */ + XML_DTD_NOTATION_VALUE, /* 527 */ + XML_DTD_NOT_EMPTY, /* 528 */ + XML_DTD_NOT_PCDATA, /* 529 */ + XML_DTD_NOT_STANDALONE, /* 530 */ + XML_DTD_ROOT_NAME, /* 531 */ + XML_DTD_STANDALONE_WHITE_SPACE, /* 532 */ + XML_DTD_UNKNOWN_ATTRIBUTE, /* 533 */ + XML_DTD_UNKNOWN_ELEM, /* 534 */ + XML_DTD_UNKNOWN_ENTITY, /* 535 */ + XML_DTD_UNKNOWN_ID, /* 536 */ + XML_DTD_UNKNOWN_NOTATION, /* 537 */ + XML_DTD_STANDALONE_DEFAULTED, /* 538 */ + XML_DTD_XMLID_VALUE, /* 539 */ + XML_DTD_XMLID_TYPE, /* 540 */ + XML_DTD_DUP_TOKEN, /* 541 */ + XML_HTML_STRUCURE_ERROR = 800, + XML_HTML_UNKNOWN_TAG, /* 801 */ + XML_HTML_INCORRECTLY_OPENED_COMMENT, /* 802 */ + XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000, + XML_RNGP_ATTR_CONFLICT, /* 1001 */ + XML_RNGP_ATTRIBUTE_CHILDREN, /* 1002 */ + XML_RNGP_ATTRIBUTE_CONTENT, /* 1003 */ + XML_RNGP_ATTRIBUTE_EMPTY, /* 1004 */ + XML_RNGP_ATTRIBUTE_NOOP, /* 1005 */ + XML_RNGP_CHOICE_CONTENT, /* 1006 */ + XML_RNGP_CHOICE_EMPTY, /* 1007 */ + XML_RNGP_CREATE_FAILURE, /* 1008 */ + XML_RNGP_DATA_CONTENT, /* 1009 */ + XML_RNGP_DEF_CHOICE_AND_INTERLEAVE, /* 1010 */ + XML_RNGP_DEFINE_CREATE_FAILED, /* 1011 */ + XML_RNGP_DEFINE_EMPTY, /* 1012 */ + XML_RNGP_DEFINE_MISSING, /* 1013 */ + XML_RNGP_DEFINE_NAME_MISSING, /* 1014 */ + XML_RNGP_ELEM_CONTENT_EMPTY, /* 1015 */ + XML_RNGP_ELEM_CONTENT_ERROR, /* 1016 */ + XML_RNGP_ELEMENT_EMPTY, /* 1017 */ + XML_RNGP_ELEMENT_CONTENT, /* 1018 */ + XML_RNGP_ELEMENT_NAME, /* 1019 */ + XML_RNGP_ELEMENT_NO_CONTENT, /* 1020 */ + XML_RNGP_ELEM_TEXT_CONFLICT, /* 1021 */ + XML_RNGP_EMPTY, /* 1022 */ + XML_RNGP_EMPTY_CONSTRUCT, /* 1023 */ + XML_RNGP_EMPTY_CONTENT, /* 1024 */ + XML_RNGP_EMPTY_NOT_EMPTY, /* 1025 */ + XML_RNGP_ERROR_TYPE_LIB, /* 1026 */ + XML_RNGP_EXCEPT_EMPTY, /* 1027 */ + XML_RNGP_EXCEPT_MISSING, /* 1028 */ + XML_RNGP_EXCEPT_MULTIPLE, /* 1029 */ + XML_RNGP_EXCEPT_NO_CONTENT, /* 1030 */ + XML_RNGP_EXTERNALREF_EMTPY, /* 1031 */ + XML_RNGP_EXTERNAL_REF_FAILURE, /* 1032 */ + XML_RNGP_EXTERNALREF_RECURSE, /* 1033 */ + XML_RNGP_FORBIDDEN_ATTRIBUTE, /* 1034 */ + XML_RNGP_FOREIGN_ELEMENT, /* 1035 */ + XML_RNGP_GRAMMAR_CONTENT, /* 1036 */ + XML_RNGP_GRAMMAR_EMPTY, /* 1037 */ + XML_RNGP_GRAMMAR_MISSING, /* 1038 */ + XML_RNGP_GRAMMAR_NO_START, /* 1039 */ + XML_RNGP_GROUP_ATTR_CONFLICT, /* 1040 */ + XML_RNGP_HREF_ERROR, /* 1041 */ + XML_RNGP_INCLUDE_EMPTY, /* 1042 */ + XML_RNGP_INCLUDE_FAILURE, /* 1043 */ + XML_RNGP_INCLUDE_RECURSE, /* 1044 */ + XML_RNGP_INTERLEAVE_ADD, /* 1045 */ + XML_RNGP_INTERLEAVE_CREATE_FAILED, /* 1046 */ + XML_RNGP_INTERLEAVE_EMPTY, /* 1047 */ + XML_RNGP_INTERLEAVE_NO_CONTENT, /* 1048 */ + XML_RNGP_INVALID_DEFINE_NAME, /* 1049 */ + XML_RNGP_INVALID_URI, /* 1050 */ + XML_RNGP_INVALID_VALUE, /* 1051 */ + XML_RNGP_MISSING_HREF, /* 1052 */ + XML_RNGP_NAME_MISSING, /* 1053 */ + XML_RNGP_NEED_COMBINE, /* 1054 */ + XML_RNGP_NOTALLOWED_NOT_EMPTY, /* 1055 */ + XML_RNGP_NSNAME_ATTR_ANCESTOR, /* 1056 */ + XML_RNGP_NSNAME_NO_NS, /* 1057 */ + XML_RNGP_PARAM_FORBIDDEN, /* 1058 */ + XML_RNGP_PARAM_NAME_MISSING, /* 1059 */ + XML_RNGP_PARENTREF_CREATE_FAILED, /* 1060 */ + XML_RNGP_PARENTREF_NAME_INVALID, /* 1061 */ + XML_RNGP_PARENTREF_NO_NAME, /* 1062 */ + XML_RNGP_PARENTREF_NO_PARENT, /* 1063 */ + XML_RNGP_PARENTREF_NOT_EMPTY, /* 1064 */ + XML_RNGP_PARSE_ERROR, /* 1065 */ + XML_RNGP_PAT_ANYNAME_EXCEPT_ANYNAME, /* 1066 */ + XML_RNGP_PAT_ATTR_ATTR, /* 1067 */ + XML_RNGP_PAT_ATTR_ELEM, /* 1068 */ + XML_RNGP_PAT_DATA_EXCEPT_ATTR, /* 1069 */ + XML_RNGP_PAT_DATA_EXCEPT_ELEM, /* 1070 */ + XML_RNGP_PAT_DATA_EXCEPT_EMPTY, /* 1071 */ + XML_RNGP_PAT_DATA_EXCEPT_GROUP, /* 1072 */ + XML_RNGP_PAT_DATA_EXCEPT_INTERLEAVE, /* 1073 */ + XML_RNGP_PAT_DATA_EXCEPT_LIST, /* 1074 */ + XML_RNGP_PAT_DATA_EXCEPT_ONEMORE, /* 1075 */ + XML_RNGP_PAT_DATA_EXCEPT_REF, /* 1076 */ + XML_RNGP_PAT_DATA_EXCEPT_TEXT, /* 1077 */ + XML_RNGP_PAT_LIST_ATTR, /* 1078 */ + XML_RNGP_PAT_LIST_ELEM, /* 1079 */ + XML_RNGP_PAT_LIST_INTERLEAVE, /* 1080 */ + XML_RNGP_PAT_LIST_LIST, /* 1081 */ + XML_RNGP_PAT_LIST_REF, /* 1082 */ + XML_RNGP_PAT_LIST_TEXT, /* 1083 */ + XML_RNGP_PAT_NSNAME_EXCEPT_ANYNAME, /* 1084 */ + XML_RNGP_PAT_NSNAME_EXCEPT_NSNAME, /* 1085 */ + XML_RNGP_PAT_ONEMORE_GROUP_ATTR, /* 1086 */ + XML_RNGP_PAT_ONEMORE_INTERLEAVE_ATTR, /* 1087 */ + XML_RNGP_PAT_START_ATTR, /* 1088 */ + XML_RNGP_PAT_START_DATA, /* 1089 */ + XML_RNGP_PAT_START_EMPTY, /* 1090 */ + XML_RNGP_PAT_START_GROUP, /* 1091 */ + XML_RNGP_PAT_START_INTERLEAVE, /* 1092 */ + XML_RNGP_PAT_START_LIST, /* 1093 */ + XML_RNGP_PAT_START_ONEMORE, /* 1094 */ + XML_RNGP_PAT_START_TEXT, /* 1095 */ + XML_RNGP_PAT_START_VALUE, /* 1096 */ + XML_RNGP_PREFIX_UNDEFINED, /* 1097 */ + XML_RNGP_REF_CREATE_FAILED, /* 1098 */ + XML_RNGP_REF_CYCLE, /* 1099 */ + XML_RNGP_REF_NAME_INVALID, /* 1100 */ + XML_RNGP_REF_NO_DEF, /* 1101 */ + XML_RNGP_REF_NO_NAME, /* 1102 */ + XML_RNGP_REF_NOT_EMPTY, /* 1103 */ + XML_RNGP_START_CHOICE_AND_INTERLEAVE, /* 1104 */ + XML_RNGP_START_CONTENT, /* 1105 */ + XML_RNGP_START_EMPTY, /* 1106 */ + XML_RNGP_START_MISSING, /* 1107 */ + XML_RNGP_TEXT_EXPECTED, /* 1108 */ + XML_RNGP_TEXT_HAS_CHILD, /* 1109 */ + XML_RNGP_TYPE_MISSING, /* 1110 */ + XML_RNGP_TYPE_NOT_FOUND, /* 1111 */ + XML_RNGP_TYPE_VALUE, /* 1112 */ + XML_RNGP_UNKNOWN_ATTRIBUTE, /* 1113 */ + XML_RNGP_UNKNOWN_COMBINE, /* 1114 */ + XML_RNGP_UNKNOWN_CONSTRUCT, /* 1115 */ + XML_RNGP_UNKNOWN_TYPE_LIB, /* 1116 */ + XML_RNGP_URI_FRAGMENT, /* 1117 */ + XML_RNGP_URI_NOT_ABSOLUTE, /* 1118 */ + XML_RNGP_VALUE_EMPTY, /* 1119 */ + XML_RNGP_VALUE_NO_CONTENT, /* 1120 */ + XML_RNGP_XMLNS_NAME, /* 1121 */ + XML_RNGP_XML_NS, /* 1122 */ + XML_XPATH_EXPRESSION_OK = 1200, + XML_XPATH_NUMBER_ERROR, /* 1201 */ + XML_XPATH_UNFINISHED_LITERAL_ERROR, /* 1202 */ + XML_XPATH_START_LITERAL_ERROR, /* 1203 */ + XML_XPATH_VARIABLE_REF_ERROR, /* 1204 */ + XML_XPATH_UNDEF_VARIABLE_ERROR, /* 1205 */ + XML_XPATH_INVALID_PREDICATE_ERROR, /* 1206 */ + XML_XPATH_EXPR_ERROR, /* 1207 */ + XML_XPATH_UNCLOSED_ERROR, /* 1208 */ + XML_XPATH_UNKNOWN_FUNC_ERROR, /* 1209 */ + XML_XPATH_INVALID_OPERAND, /* 1210 */ + XML_XPATH_INVALID_TYPE, /* 1211 */ + XML_XPATH_INVALID_ARITY, /* 1212 */ + XML_XPATH_INVALID_CTXT_SIZE, /* 1213 */ + XML_XPATH_INVALID_CTXT_POSITION, /* 1214 */ + XML_XPATH_MEMORY_ERROR, /* 1215 */ + XML_XPTR_SYNTAX_ERROR, /* 1216 */ + XML_XPTR_RESOURCE_ERROR, /* 1217 */ + XML_XPTR_SUB_RESOURCE_ERROR, /* 1218 */ + XML_XPATH_UNDEF_PREFIX_ERROR, /* 1219 */ + XML_XPATH_ENCODING_ERROR, /* 1220 */ + XML_XPATH_INVALID_CHAR_ERROR, /* 1221 */ + XML_TREE_INVALID_HEX = 1300, + XML_TREE_INVALID_DEC, /* 1301 */ + XML_TREE_UNTERMINATED_ENTITY, /* 1302 */ + XML_TREE_NOT_UTF8, /* 1303 */ + XML_SAVE_NOT_UTF8 = 1400, + XML_SAVE_CHAR_INVALID, /* 1401 */ + XML_SAVE_NO_DOCTYPE, /* 1402 */ + XML_SAVE_UNKNOWN_ENCODING, /* 1403 */ + XML_REGEXP_COMPILE_ERROR = 1450, + XML_IO_UNKNOWN = 1500, + XML_IO_EACCES, /* 1501 */ + XML_IO_EAGAIN, /* 1502 */ + XML_IO_EBADF, /* 1503 */ + XML_IO_EBADMSG, /* 1504 */ + XML_IO_EBUSY, /* 1505 */ + XML_IO_ECANCELED, /* 1506 */ + XML_IO_ECHILD, /* 1507 */ + XML_IO_EDEADLK, /* 1508 */ + XML_IO_EDOM, /* 1509 */ + XML_IO_EEXIST, /* 1510 */ + XML_IO_EFAULT, /* 1511 */ + XML_IO_EFBIG, /* 1512 */ + XML_IO_EINPROGRESS, /* 1513 */ + XML_IO_EINTR, /* 1514 */ + XML_IO_EINVAL, /* 1515 */ + XML_IO_EIO, /* 1516 */ + XML_IO_EISDIR, /* 1517 */ + XML_IO_EMFILE, /* 1518 */ + XML_IO_EMLINK, /* 1519 */ + XML_IO_EMSGSIZE, /* 1520 */ + XML_IO_ENAMETOOLONG, /* 1521 */ + XML_IO_ENFILE, /* 1522 */ + XML_IO_ENODEV, /* 1523 */ + XML_IO_ENOENT, /* 1524 */ + XML_IO_ENOEXEC, /* 1525 */ + XML_IO_ENOLCK, /* 1526 */ + XML_IO_ENOMEM, /* 1527 */ + XML_IO_ENOSPC, /* 1528 */ + XML_IO_ENOSYS, /* 1529 */ + XML_IO_ENOTDIR, /* 1530 */ + XML_IO_ENOTEMPTY, /* 1531 */ + XML_IO_ENOTSUP, /* 1532 */ + XML_IO_ENOTTY, /* 1533 */ + XML_IO_ENXIO, /* 1534 */ + XML_IO_EPERM, /* 1535 */ + XML_IO_EPIPE, /* 1536 */ + XML_IO_ERANGE, /* 1537 */ + XML_IO_EROFS, /* 1538 */ + XML_IO_ESPIPE, /* 1539 */ + XML_IO_ESRCH, /* 1540 */ + XML_IO_ETIMEDOUT, /* 1541 */ + XML_IO_EXDEV, /* 1542 */ + XML_IO_NETWORK_ATTEMPT, /* 1543 */ + XML_IO_ENCODER, /* 1544 */ + XML_IO_FLUSH, /* 1545 */ + XML_IO_WRITE, /* 1546 */ + XML_IO_NO_INPUT, /* 1547 */ + XML_IO_BUFFER_FULL, /* 1548 */ + XML_IO_LOAD_ERROR, /* 1549 */ + XML_IO_ENOTSOCK, /* 1550 */ + XML_IO_EISCONN, /* 1551 */ + XML_IO_ECONNREFUSED, /* 1552 */ + XML_IO_ENETUNREACH, /* 1553 */ + XML_IO_EADDRINUSE, /* 1554 */ + XML_IO_EALREADY, /* 1555 */ + XML_IO_EAFNOSUPPORT, /* 1556 */ + XML_XINCLUDE_RECURSION=1600, + XML_XINCLUDE_PARSE_VALUE, /* 1601 */ + XML_XINCLUDE_ENTITY_DEF_MISMATCH, /* 1602 */ + XML_XINCLUDE_NO_HREF, /* 1603 */ + XML_XINCLUDE_NO_FALLBACK, /* 1604 */ + XML_XINCLUDE_HREF_URI, /* 1605 */ + XML_XINCLUDE_TEXT_FRAGMENT, /* 1606 */ + XML_XINCLUDE_TEXT_DOCUMENT, /* 1607 */ + XML_XINCLUDE_INVALID_CHAR, /* 1608 */ + XML_XINCLUDE_BUILD_FAILED, /* 1609 */ + XML_XINCLUDE_UNKNOWN_ENCODING, /* 1610 */ + XML_XINCLUDE_MULTIPLE_ROOT, /* 1611 */ + XML_XINCLUDE_XPTR_FAILED, /* 1612 */ + XML_XINCLUDE_XPTR_RESULT, /* 1613 */ + XML_XINCLUDE_INCLUDE_IN_INCLUDE, /* 1614 */ + XML_XINCLUDE_FALLBACKS_IN_INCLUDE, /* 1615 */ + XML_XINCLUDE_FALLBACK_NOT_IN_INCLUDE, /* 1616 */ + XML_XINCLUDE_DEPRECATED_NS, /* 1617 */ + XML_XINCLUDE_FRAGMENT_ID, /* 1618 */ + XML_CATALOG_MISSING_ATTR = 1650, + XML_CATALOG_ENTRY_BROKEN, /* 1651 */ + XML_CATALOG_PREFER_VALUE, /* 1652 */ + XML_CATALOG_NOT_CATALOG, /* 1653 */ + XML_CATALOG_RECURSION, /* 1654 */ + XML_SCHEMAP_PREFIX_UNDEFINED = 1700, + XML_SCHEMAP_ATTRFORMDEFAULT_VALUE, /* 1701 */ + XML_SCHEMAP_ATTRGRP_NONAME_NOREF, /* 1702 */ + XML_SCHEMAP_ATTR_NONAME_NOREF, /* 1703 */ + XML_SCHEMAP_COMPLEXTYPE_NONAME_NOREF, /* 1704 */ + XML_SCHEMAP_ELEMFORMDEFAULT_VALUE, /* 1705 */ + XML_SCHEMAP_ELEM_NONAME_NOREF, /* 1706 */ + XML_SCHEMAP_EXTENSION_NO_BASE, /* 1707 */ + XML_SCHEMAP_FACET_NO_VALUE, /* 1708 */ + XML_SCHEMAP_FAILED_BUILD_IMPORT, /* 1709 */ + XML_SCHEMAP_GROUP_NONAME_NOREF, /* 1710 */ + XML_SCHEMAP_IMPORT_NAMESPACE_NOT_URI, /* 1711 */ + XML_SCHEMAP_IMPORT_REDEFINE_NSNAME, /* 1712 */ + XML_SCHEMAP_IMPORT_SCHEMA_NOT_URI, /* 1713 */ + XML_SCHEMAP_INVALID_BOOLEAN, /* 1714 */ + XML_SCHEMAP_INVALID_ENUM, /* 1715 */ + XML_SCHEMAP_INVALID_FACET, /* 1716 */ + XML_SCHEMAP_INVALID_FACET_VALUE, /* 1717 */ + XML_SCHEMAP_INVALID_MAXOCCURS, /* 1718 */ + XML_SCHEMAP_INVALID_MINOCCURS, /* 1719 */ + XML_SCHEMAP_INVALID_REF_AND_SUBTYPE, /* 1720 */ + XML_SCHEMAP_INVALID_WHITE_SPACE, /* 1721 */ + XML_SCHEMAP_NOATTR_NOREF, /* 1722 */ + XML_SCHEMAP_NOTATION_NO_NAME, /* 1723 */ + XML_SCHEMAP_NOTYPE_NOREF, /* 1724 */ + XML_SCHEMAP_REF_AND_SUBTYPE, /* 1725 */ + XML_SCHEMAP_RESTRICTION_NONAME_NOREF, /* 1726 */ + XML_SCHEMAP_SIMPLETYPE_NONAME, /* 1727 */ + XML_SCHEMAP_TYPE_AND_SUBTYPE, /* 1728 */ + XML_SCHEMAP_UNKNOWN_ALL_CHILD, /* 1729 */ + XML_SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD, /* 1730 */ + XML_SCHEMAP_UNKNOWN_ATTR_CHILD, /* 1731 */ + XML_SCHEMAP_UNKNOWN_ATTRGRP_CHILD, /* 1732 */ + XML_SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP, /* 1733 */ + XML_SCHEMAP_UNKNOWN_BASE_TYPE, /* 1734 */ + XML_SCHEMAP_UNKNOWN_CHOICE_CHILD, /* 1735 */ + XML_SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD, /* 1736 */ + XML_SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD, /* 1737 */ + XML_SCHEMAP_UNKNOWN_ELEM_CHILD, /* 1738 */ + XML_SCHEMAP_UNKNOWN_EXTENSION_CHILD, /* 1739 */ + XML_SCHEMAP_UNKNOWN_FACET_CHILD, /* 1740 */ + XML_SCHEMAP_UNKNOWN_FACET_TYPE, /* 1741 */ + XML_SCHEMAP_UNKNOWN_GROUP_CHILD, /* 1742 */ + XML_SCHEMAP_UNKNOWN_IMPORT_CHILD, /* 1743 */ + XML_SCHEMAP_UNKNOWN_LIST_CHILD, /* 1744 */ + XML_SCHEMAP_UNKNOWN_NOTATION_CHILD, /* 1745 */ + XML_SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD, /* 1746 */ + XML_SCHEMAP_UNKNOWN_REF, /* 1747 */ + XML_SCHEMAP_UNKNOWN_RESTRICTION_CHILD, /* 1748 */ + XML_SCHEMAP_UNKNOWN_SCHEMAS_CHILD, /* 1749 */ + XML_SCHEMAP_UNKNOWN_SEQUENCE_CHILD, /* 1750 */ + XML_SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD, /* 1751 */ + XML_SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD, /* 1752 */ + XML_SCHEMAP_UNKNOWN_TYPE, /* 1753 */ + XML_SCHEMAP_UNKNOWN_UNION_CHILD, /* 1754 */ + XML_SCHEMAP_ELEM_DEFAULT_FIXED, /* 1755 */ + XML_SCHEMAP_REGEXP_INVALID, /* 1756 */ + XML_SCHEMAP_FAILED_LOAD, /* 1757 */ + XML_SCHEMAP_NOTHING_TO_PARSE, /* 1758 */ + XML_SCHEMAP_NOROOT, /* 1759 */ + XML_SCHEMAP_REDEFINED_GROUP, /* 1760 */ + XML_SCHEMAP_REDEFINED_TYPE, /* 1761 */ + XML_SCHEMAP_REDEFINED_ELEMENT, /* 1762 */ + XML_SCHEMAP_REDEFINED_ATTRGROUP, /* 1763 */ + XML_SCHEMAP_REDEFINED_ATTR, /* 1764 */ + XML_SCHEMAP_REDEFINED_NOTATION, /* 1765 */ + XML_SCHEMAP_FAILED_PARSE, /* 1766 */ + XML_SCHEMAP_UNKNOWN_PREFIX, /* 1767 */ + XML_SCHEMAP_DEF_AND_PREFIX, /* 1768 */ + XML_SCHEMAP_UNKNOWN_INCLUDE_CHILD, /* 1769 */ + XML_SCHEMAP_INCLUDE_SCHEMA_NOT_URI, /* 1770 */ + XML_SCHEMAP_INCLUDE_SCHEMA_NO_URI, /* 1771 */ + XML_SCHEMAP_NOT_SCHEMA, /* 1772 */ + XML_SCHEMAP_UNKNOWN_MEMBER_TYPE, /* 1773 */ + XML_SCHEMAP_INVALID_ATTR_USE, /* 1774 */ + XML_SCHEMAP_RECURSIVE, /* 1775 */ + XML_SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE, /* 1776 */ + XML_SCHEMAP_INVALID_ATTR_COMBINATION, /* 1777 */ + XML_SCHEMAP_INVALID_ATTR_INLINE_COMBINATION, /* 1778 */ + XML_SCHEMAP_MISSING_SIMPLETYPE_CHILD, /* 1779 */ + XML_SCHEMAP_INVALID_ATTR_NAME, /* 1780 */ + XML_SCHEMAP_REF_AND_CONTENT, /* 1781 */ + XML_SCHEMAP_CT_PROPS_CORRECT_1, /* 1782 */ + XML_SCHEMAP_CT_PROPS_CORRECT_2, /* 1783 */ + XML_SCHEMAP_CT_PROPS_CORRECT_3, /* 1784 */ + XML_SCHEMAP_CT_PROPS_CORRECT_4, /* 1785 */ + XML_SCHEMAP_CT_PROPS_CORRECT_5, /* 1786 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_1, /* 1787 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1, /* 1788 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2, /* 1789 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_2, /* 1790 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_3, /* 1791 */ + XML_SCHEMAP_WILDCARD_INVALID_NS_MEMBER, /* 1792 */ + XML_SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE, /* 1793 */ + XML_SCHEMAP_UNION_NOT_EXPRESSIBLE, /* 1794 */ + XML_SCHEMAP_SRC_IMPORT_3_1, /* 1795 */ + XML_SCHEMAP_SRC_IMPORT_3_2, /* 1796 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_1, /* 1797 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_2, /* 1798 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_3, /* 1799 */ + XML_SCHEMAP_COS_CT_EXTENDS_1_3, /* 1800 */ + XML_SCHEMAV_NOROOT = 1801, + XML_SCHEMAV_UNDECLAREDELEM, /* 1802 */ + XML_SCHEMAV_NOTTOPLEVEL, /* 1803 */ + XML_SCHEMAV_MISSING, /* 1804 */ + XML_SCHEMAV_WRONGELEM, /* 1805 */ + XML_SCHEMAV_NOTYPE, /* 1806 */ + XML_SCHEMAV_NOROLLBACK, /* 1807 */ + XML_SCHEMAV_ISABSTRACT, /* 1808 */ + XML_SCHEMAV_NOTEMPTY, /* 1809 */ + XML_SCHEMAV_ELEMCONT, /* 1810 */ + XML_SCHEMAV_HAVEDEFAULT, /* 1811 */ + XML_SCHEMAV_NOTNILLABLE, /* 1812 */ + XML_SCHEMAV_EXTRACONTENT, /* 1813 */ + XML_SCHEMAV_INVALIDATTR, /* 1814 */ + XML_SCHEMAV_INVALIDELEM, /* 1815 */ + XML_SCHEMAV_NOTDETERMINIST, /* 1816 */ + XML_SCHEMAV_CONSTRUCT, /* 1817 */ + XML_SCHEMAV_INTERNAL, /* 1818 */ + XML_SCHEMAV_NOTSIMPLE, /* 1819 */ + XML_SCHEMAV_ATTRUNKNOWN, /* 1820 */ + XML_SCHEMAV_ATTRINVALID, /* 1821 */ + XML_SCHEMAV_VALUE, /* 1822 */ + XML_SCHEMAV_FACET, /* 1823 */ + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_1, /* 1824 */ + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_2, /* 1825 */ + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_3, /* 1826 */ + XML_SCHEMAV_CVC_TYPE_3_1_1, /* 1827 */ + XML_SCHEMAV_CVC_TYPE_3_1_2, /* 1828 */ + XML_SCHEMAV_CVC_FACET_VALID, /* 1829 */ + XML_SCHEMAV_CVC_LENGTH_VALID, /* 1830 */ + XML_SCHEMAV_CVC_MINLENGTH_VALID, /* 1831 */ + XML_SCHEMAV_CVC_MAXLENGTH_VALID, /* 1832 */ + XML_SCHEMAV_CVC_MININCLUSIVE_VALID, /* 1833 */ + XML_SCHEMAV_CVC_MAXINCLUSIVE_VALID, /* 1834 */ + XML_SCHEMAV_CVC_MINEXCLUSIVE_VALID, /* 1835 */ + XML_SCHEMAV_CVC_MAXEXCLUSIVE_VALID, /* 1836 */ + XML_SCHEMAV_CVC_TOTALDIGITS_VALID, /* 1837 */ + XML_SCHEMAV_CVC_FRACTIONDIGITS_VALID, /* 1838 */ + XML_SCHEMAV_CVC_PATTERN_VALID, /* 1839 */ + XML_SCHEMAV_CVC_ENUMERATION_VALID, /* 1840 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_1, /* 1841 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_2, /* 1842 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_3, /* 1843 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_4, /* 1844 */ + XML_SCHEMAV_CVC_ELT_1, /* 1845 */ + XML_SCHEMAV_CVC_ELT_2, /* 1846 */ + XML_SCHEMAV_CVC_ELT_3_1, /* 1847 */ + XML_SCHEMAV_CVC_ELT_3_2_1, /* 1848 */ + XML_SCHEMAV_CVC_ELT_3_2_2, /* 1849 */ + XML_SCHEMAV_CVC_ELT_4_1, /* 1850 */ + XML_SCHEMAV_CVC_ELT_4_2, /* 1851 */ + XML_SCHEMAV_CVC_ELT_4_3, /* 1852 */ + XML_SCHEMAV_CVC_ELT_5_1_1, /* 1853 */ + XML_SCHEMAV_CVC_ELT_5_1_2, /* 1854 */ + XML_SCHEMAV_CVC_ELT_5_2_1, /* 1855 */ + XML_SCHEMAV_CVC_ELT_5_2_2_1, /* 1856 */ + XML_SCHEMAV_CVC_ELT_5_2_2_2_1, /* 1857 */ + XML_SCHEMAV_CVC_ELT_5_2_2_2_2, /* 1858 */ + XML_SCHEMAV_CVC_ELT_6, /* 1859 */ + XML_SCHEMAV_CVC_ELT_7, /* 1860 */ + XML_SCHEMAV_CVC_ATTRIBUTE_1, /* 1861 */ + XML_SCHEMAV_CVC_ATTRIBUTE_2, /* 1862 */ + XML_SCHEMAV_CVC_ATTRIBUTE_3, /* 1863 */ + XML_SCHEMAV_CVC_ATTRIBUTE_4, /* 1864 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_1, /* 1865 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_1, /* 1866 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_2, /* 1867 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_4, /* 1868 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_5_1, /* 1869 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_5_2, /* 1870 */ + XML_SCHEMAV_ELEMENT_CONTENT, /* 1871 */ + XML_SCHEMAV_DOCUMENT_ELEMENT_MISSING, /* 1872 */ + XML_SCHEMAV_CVC_COMPLEX_TYPE_1, /* 1873 */ + XML_SCHEMAV_CVC_AU, /* 1874 */ + XML_SCHEMAV_CVC_TYPE_1, /* 1875 */ + XML_SCHEMAV_CVC_TYPE_2, /* 1876 */ + XML_SCHEMAV_CVC_IDC, /* 1877 */ + XML_SCHEMAV_CVC_WILDCARD, /* 1878 */ + XML_SCHEMAV_MISC, /* 1879 */ + XML_XPTR_UNKNOWN_SCHEME = 1900, + XML_XPTR_CHILDSEQ_START, /* 1901 */ + XML_XPTR_EVAL_FAILED, /* 1902 */ + XML_XPTR_EXTRA_OBJECTS, /* 1903 */ + XML_C14N_CREATE_CTXT = 1950, + XML_C14N_REQUIRES_UTF8, /* 1951 */ + XML_C14N_CREATE_STACK, /* 1952 */ + XML_C14N_INVALID_NODE, /* 1953 */ + XML_C14N_UNKNOW_NODE, /* 1954 */ + XML_C14N_RELATIVE_NAMESPACE, /* 1955 */ + XML_FTP_PASV_ANSWER = 2000, + XML_FTP_EPSV_ANSWER, /* 2001 */ + XML_FTP_ACCNT, /* 2002 */ + XML_FTP_URL_SYNTAX, /* 2003 */ + XML_HTTP_URL_SYNTAX = 2020, + XML_HTTP_USE_IP, /* 2021 */ + XML_HTTP_UNKNOWN_HOST, /* 2022 */ + XML_SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000, + XML_SCHEMAP_SRC_SIMPLE_TYPE_2, /* 3001 */ + XML_SCHEMAP_SRC_SIMPLE_TYPE_3, /* 3002 */ + XML_SCHEMAP_SRC_SIMPLE_TYPE_4, /* 3003 */ + XML_SCHEMAP_SRC_RESOLVE, /* 3004 */ + XML_SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE, /* 3005 */ + XML_SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE, /* 3006 */ + XML_SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES, /* 3007 */ + XML_SCHEMAP_ST_PROPS_CORRECT_1, /* 3008 */ + XML_SCHEMAP_ST_PROPS_CORRECT_2, /* 3009 */ + XML_SCHEMAP_ST_PROPS_CORRECT_3, /* 3010 */ + XML_SCHEMAP_COS_ST_RESTRICTS_1_1, /* 3011 */ + XML_SCHEMAP_COS_ST_RESTRICTS_1_2, /* 3012 */ + XML_SCHEMAP_COS_ST_RESTRICTS_1_3_1, /* 3013 */ + XML_SCHEMAP_COS_ST_RESTRICTS_1_3_2, /* 3014 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_1, /* 3015 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_1, /* 3016 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_2, /* 3017 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_1, /* 3018 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_2, /* 3019 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_3, /* 3020 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_4, /* 3021 */ + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_5, /* 3022 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_1, /* 3023 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1, /* 3024 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1_2, /* 3025 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_2, /* 3026 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_1, /* 3027 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_3, /* 3028 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_4, /* 3029 */ + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_5, /* 3030 */ + XML_SCHEMAP_COS_ST_DERIVED_OK_2_1, /* 3031 */ + XML_SCHEMAP_COS_ST_DERIVED_OK_2_2, /* 3032 */ + XML_SCHEMAP_S4S_ELEM_NOT_ALLOWED, /* 3033 */ + XML_SCHEMAP_S4S_ELEM_MISSING, /* 3034 */ + XML_SCHEMAP_S4S_ATTR_NOT_ALLOWED, /* 3035 */ + XML_SCHEMAP_S4S_ATTR_MISSING, /* 3036 */ + XML_SCHEMAP_S4S_ATTR_INVALID_VALUE, /* 3037 */ + XML_SCHEMAP_SRC_ELEMENT_1, /* 3038 */ + XML_SCHEMAP_SRC_ELEMENT_2_1, /* 3039 */ + XML_SCHEMAP_SRC_ELEMENT_2_2, /* 3040 */ + XML_SCHEMAP_SRC_ELEMENT_3, /* 3041 */ + XML_SCHEMAP_P_PROPS_CORRECT_1, /* 3042 */ + XML_SCHEMAP_P_PROPS_CORRECT_2_1, /* 3043 */ + XML_SCHEMAP_P_PROPS_CORRECT_2_2, /* 3044 */ + XML_SCHEMAP_E_PROPS_CORRECT_2, /* 3045 */ + XML_SCHEMAP_E_PROPS_CORRECT_3, /* 3046 */ + XML_SCHEMAP_E_PROPS_CORRECT_4, /* 3047 */ + XML_SCHEMAP_E_PROPS_CORRECT_5, /* 3048 */ + XML_SCHEMAP_E_PROPS_CORRECT_6, /* 3049 */ + XML_SCHEMAP_SRC_INCLUDE, /* 3050 */ + XML_SCHEMAP_SRC_ATTRIBUTE_1, /* 3051 */ + XML_SCHEMAP_SRC_ATTRIBUTE_2, /* 3052 */ + XML_SCHEMAP_SRC_ATTRIBUTE_3_1, /* 3053 */ + XML_SCHEMAP_SRC_ATTRIBUTE_3_2, /* 3054 */ + XML_SCHEMAP_SRC_ATTRIBUTE_4, /* 3055 */ + XML_SCHEMAP_NO_XMLNS, /* 3056 */ + XML_SCHEMAP_NO_XSI, /* 3057 */ + XML_SCHEMAP_COS_VALID_DEFAULT_1, /* 3058 */ + XML_SCHEMAP_COS_VALID_DEFAULT_2_1, /* 3059 */ + XML_SCHEMAP_COS_VALID_DEFAULT_2_2_1, /* 3060 */ + XML_SCHEMAP_COS_VALID_DEFAULT_2_2_2, /* 3061 */ + XML_SCHEMAP_CVC_SIMPLE_TYPE, /* 3062 */ + XML_SCHEMAP_COS_CT_EXTENDS_1_1, /* 3063 */ + XML_SCHEMAP_SRC_IMPORT_1_1, /* 3064 */ + XML_SCHEMAP_SRC_IMPORT_1_2, /* 3065 */ + XML_SCHEMAP_SRC_IMPORT_2, /* 3066 */ + XML_SCHEMAP_SRC_IMPORT_2_1, /* 3067 */ + XML_SCHEMAP_SRC_IMPORT_2_2, /* 3068 */ + XML_SCHEMAP_INTERNAL, /* 3069 non-W3C */ + XML_SCHEMAP_NOT_DETERMINISTIC, /* 3070 non-W3C */ + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1, /* 3071 */ + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2, /* 3072 */ + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3, /* 3073 */ + XML_SCHEMAP_MG_PROPS_CORRECT_1, /* 3074 */ + XML_SCHEMAP_MG_PROPS_CORRECT_2, /* 3075 */ + XML_SCHEMAP_SRC_CT_1, /* 3076 */ + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3, /* 3077 */ + XML_SCHEMAP_AU_PROPS_CORRECT_2, /* 3078 */ + XML_SCHEMAP_A_PROPS_CORRECT_2, /* 3079 */ + XML_SCHEMAP_C_PROPS_CORRECT, /* 3080 */ + XML_SCHEMAP_SRC_REDEFINE, /* 3081 */ + XML_SCHEMAP_SRC_IMPORT, /* 3082 */ + XML_SCHEMAP_WARN_SKIP_SCHEMA, /* 3083 */ + XML_SCHEMAP_WARN_UNLOCATED_SCHEMA, /* 3084 */ + XML_SCHEMAP_WARN_ATTR_REDECL_PROH, /* 3085 */ + XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH, /* 3085 */ + XML_SCHEMAP_AG_PROPS_CORRECT, /* 3086 */ + XML_SCHEMAP_COS_CT_EXTENDS_1_2, /* 3087 */ + XML_SCHEMAP_AU_PROPS_CORRECT, /* 3088 */ + XML_SCHEMAP_A_PROPS_CORRECT_3, /* 3089 */ + XML_SCHEMAP_COS_ALL_LIMITED, /* 3090 */ + XML_SCHEMATRONV_ASSERT = 4000, /* 4000 */ + XML_SCHEMATRONV_REPORT, + XML_MODULE_OPEN = 4900, /* 4900 */ + XML_MODULE_CLOSE, /* 4901 */ + XML_CHECK_FOUND_ELEMENT = 5000, + XML_CHECK_FOUND_ATTRIBUTE, /* 5001 */ + XML_CHECK_FOUND_TEXT, /* 5002 */ + XML_CHECK_FOUND_CDATA, /* 5003 */ + XML_CHECK_FOUND_ENTITYREF, /* 5004 */ + XML_CHECK_FOUND_ENTITY, /* 5005 */ + XML_CHECK_FOUND_PI, /* 5006 */ + XML_CHECK_FOUND_COMMENT, /* 5007 */ + XML_CHECK_FOUND_DOCTYPE, /* 5008 */ + XML_CHECK_FOUND_FRAGMENT, /* 5009 */ + XML_CHECK_FOUND_NOTATION, /* 5010 */ + XML_CHECK_UNKNOWN_NODE, /* 5011 */ + XML_CHECK_ENTITY_TYPE, /* 5012 */ + XML_CHECK_NO_PARENT, /* 5013 */ + XML_CHECK_NO_DOC, /* 5014 */ + XML_CHECK_NO_NAME, /* 5015 */ + XML_CHECK_NO_ELEM, /* 5016 */ + XML_CHECK_WRONG_DOC, /* 5017 */ + XML_CHECK_NO_PREV, /* 5018 */ + XML_CHECK_WRONG_PREV, /* 5019 */ + XML_CHECK_NO_NEXT, /* 5020 */ + XML_CHECK_WRONG_NEXT, /* 5021 */ + XML_CHECK_NOT_DTD, /* 5022 */ + XML_CHECK_NOT_ATTR, /* 5023 */ + XML_CHECK_NOT_ATTR_DECL, /* 5024 */ + XML_CHECK_NOT_ELEM_DECL, /* 5025 */ + XML_CHECK_NOT_ENTITY_DECL, /* 5026 */ + XML_CHECK_NOT_NS_DECL, /* 5027 */ + XML_CHECK_NO_HREF, /* 5028 */ + XML_CHECK_WRONG_PARENT,/* 5029 */ + XML_CHECK_NS_SCOPE, /* 5030 */ + XML_CHECK_NS_ANCESTOR, /* 5031 */ + XML_CHECK_NOT_UTF8, /* 5032 */ + XML_CHECK_NO_DICT, /* 5033 */ + XML_CHECK_NOT_NCNAME, /* 5034 */ + XML_CHECK_OUTSIDE_DICT, /* 5035 */ + XML_CHECK_WRONG_NAME, /* 5036 */ + XML_CHECK_NAME_NOT_NULL, /* 5037 */ + XML_I18N_NO_NAME = 6000, + XML_I18N_NO_HANDLER, /* 6001 */ + XML_I18N_EXCESS_HANDLER, /* 6002 */ + XML_I18N_CONV_FAILED, /* 6003 */ + XML_I18N_NO_OUTPUT, /* 6004 */ + XML_BUF_OVERFLOW = 7000 +} xmlParserErrors; + +/** + * xmlGenericErrorFunc: + * @ctx: a parsing context + * @msg: the message + * @...: the extra arguments of the varargs to format the message + * + * Signature of the function to use when there is an error and + * no parsing or validity context available . + */ +typedef void (*xmlGenericErrorFunc) (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); +/** + * xmlStructuredErrorFunc: + * @userData: user provided data for the error callback + * @error: the error being raised. + * + * Signature of the function to use when there is an error and + * the module handles the new error reporting mechanism. + */ +typedef void (*xmlStructuredErrorFunc) (void *userData, const xmlError *error); + +/** DOC_DISABLE */ +#define XML_GLOBALS_ERROR \ + XML_OP(xmlLastError, xmlError, XML_DEPRECATED) \ + XML_OP(xmlGenericError, xmlGenericErrorFunc, XML_NO_ATTR) \ + XML_OP(xmlGenericErrorContext, void *, XML_NO_ATTR) \ + XML_OP(xmlStructuredError, xmlStructuredErrorFunc, XML_NO_ATTR) \ + XML_OP(xmlStructuredErrorContext, void *, XML_NO_ATTR) + +#define XML_OP XML_DECLARE_GLOBAL +XML_GLOBALS_ERROR +#undef XML_OP + +#if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define xmlLastError XML_GLOBAL_MACRO(xmlLastError) + #define xmlGenericError XML_GLOBAL_MACRO(xmlGenericError) + #define xmlGenericErrorContext XML_GLOBAL_MACRO(xmlGenericErrorContext) + #define xmlStructuredError XML_GLOBAL_MACRO(xmlStructuredError) + #define xmlStructuredErrorContext XML_GLOBAL_MACRO(xmlStructuredErrorContext) +#endif +/** DOC_ENABLE */ + +/* + * Use the following function to reset the two global variables + * xmlGenericError and xmlGenericErrorContext. + */ +XMLPUBFUN void + xmlSetGenericErrorFunc (void *ctx, + xmlGenericErrorFunc handler); +XMLPUBFUN void + xmlThrDefSetGenericErrorFunc(void *ctx, + xmlGenericErrorFunc handler); +XML_DEPRECATED +XMLPUBFUN void + initGenericErrorDefaultFunc (xmlGenericErrorFunc *handler); + +XMLPUBFUN void + xmlSetStructuredErrorFunc (void *ctx, + xmlStructuredErrorFunc handler); +XMLPUBFUN void + xmlThrDefSetStructuredErrorFunc(void *ctx, + xmlStructuredErrorFunc handler); +/* + * Default message routines used by SAX and Valid context for error + * and warning reporting. + */ +XMLPUBFUN void + xmlParserError (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); +XMLPUBFUN void + xmlParserWarning (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); +XMLPUBFUN void + xmlParserValidityError (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); +XMLPUBFUN void + xmlParserValidityWarning (void *ctx, + const char *msg, + ...) LIBXML_ATTR_FORMAT(2,3); +struct _xmlParserInput; +XMLPUBFUN void + xmlParserPrintFileInfo (struct _xmlParserInput *input); +XMLPUBFUN void + xmlParserPrintFileContext (struct _xmlParserInput *input); + +/* + * Extended error information routines + */ +XMLPUBFUN const xmlError * + xmlGetLastError (void); +XMLPUBFUN void + xmlResetLastError (void); +XMLPUBFUN const xmlError * + xmlCtxtGetLastError (void *ctx); +XMLPUBFUN void + xmlCtxtResetLastError (void *ctx); +XMLPUBFUN void + xmlResetError (xmlErrorPtr err); +XMLPUBFUN int + xmlCopyError (const xmlError *from, + xmlErrorPtr to); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_ERROR_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlexports.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlexports.h new file mode 100644 index 00000000..3b063e7d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlexports.h @@ -0,0 +1,50 @@ +/* + * Summary: macros for marking symbols as exportable/importable. + * Description: macros for marking symbols as exportable/importable. + * + * Copy: See Copyright for the status of this software. + */ + +#ifndef __XML_EXPORTS_H__ +#define __XML_EXPORTS_H__ + +/** DOC_DISABLE */ +#if defined(_WIN32) || defined(__CYGWIN__) + #ifdef LIBXML_STATIC + #define XMLPUBLIC + #elif defined(IN_LIBXML) + #define XMLPUBLIC __declspec(dllexport) + #else + #define XMLPUBLIC __declspec(dllimport) + #endif +#else /* not Windows */ + #define XMLPUBLIC +#endif /* platform switch */ +/** DOC_ENABLE */ + +/* + * XMLPUBFUN: + * + * Macro which declares an exportable function + */ +#define XMLPUBFUN XMLPUBLIC + +/** + * XMLPUBVAR: + * + * Macro which declares an exportable variable + */ +#define XMLPUBVAR XMLPUBLIC extern + +/** DOC_DISABLE */ +/* Compatibility */ +#define XMLCALL +#define XMLCDECL +#if !defined(LIBXML_DLL_IMPORT) +#define LIBXML_DLL_IMPORT XMLPUBVAR +#endif +/** DOC_ENABLE */ + +#endif /* __XML_EXPORTS_H__ */ + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmemory.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmemory.h new file mode 100644 index 00000000..097e3c8f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmemory.h @@ -0,0 +1,225 @@ +/* + * Summary: interface for the memory allocator + * Description: provides interfaces for the memory allocator, + * including debugging capabilities. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __DEBUG_MEMORY_ALLOC__ +#define __DEBUG_MEMORY_ALLOC__ + +#include <stdio.h> +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * The XML memory wrapper support 4 basic overloadable functions. + */ +/** + * xmlFreeFunc: + * @mem: an already allocated block of memory + * + * Signature for a free() implementation. + */ +typedef void (*xmlFreeFunc)(void *mem); +/** + * xmlMallocFunc: + * @size: the size requested in bytes + * + * Signature for a malloc() implementation. + * + * Returns a pointer to the newly allocated block or NULL in case of error. + */ +typedef void *(LIBXML_ATTR_ALLOC_SIZE(1) *xmlMallocFunc)(size_t size); + +/** + * xmlReallocFunc: + * @mem: an already allocated block of memory + * @size: the new size requested in bytes + * + * Signature for a realloc() implementation. + * + * Returns a pointer to the newly reallocated block or NULL in case of error. + */ +typedef void *(*xmlReallocFunc)(void *mem, size_t size); + +/** + * xmlStrdupFunc: + * @str: a zero terminated string + * + * Signature for an strdup() implementation. + * + * Returns the copy of the string or NULL in case of error. + */ +typedef char *(*xmlStrdupFunc)(const char *str); + +/* + * In general the memory allocation entry points are not kept + * thread specific but this can be overridden by LIBXML_THREAD_ALLOC_ENABLED + * - xmlMalloc + * - xmlMallocAtomic + * - xmlRealloc + * - xmlMemStrdup + * - xmlFree + */ +/** DOC_DISABLE */ +#ifdef LIBXML_THREAD_ALLOC_ENABLED + #define XML_GLOBALS_ALLOC \ + XML_OP(xmlMalloc, xmlMallocFunc, XML_NO_ATTR) \ + XML_OP(xmlMallocAtomic, xmlMallocFunc, XML_NO_ATTR) \ + XML_OP(xmlRealloc, xmlReallocFunc, XML_NO_ATTR) \ + XML_OP(xmlFree, xmlFreeFunc, XML_NO_ATTR) \ + XML_OP(xmlMemStrdup, xmlStrdupFunc, XML_NO_ATTR) + #define XML_OP XML_DECLARE_GLOBAL + XML_GLOBALS_ALLOC + #undef XML_OP + #if defined(LIBXML_THREAD_ENABLED) && !defined(XML_GLOBALS_NO_REDEFINITION) + #define xmlMalloc XML_GLOBAL_MACRO(xmlMalloc) + #define xmlMallocAtomic XML_GLOBAL_MACRO(xmlMallocAtomic) + #define xmlRealloc XML_GLOBAL_MACRO(xmlRealloc) + #define xmlFree XML_GLOBAL_MACRO(xmlFree) + #define xmlMemStrdup XML_GLOBAL_MACRO(xmlMemStrdup) + #endif +#else + #define XML_GLOBALS_ALLOC +/** DOC_ENABLE */ + XMLPUBVAR xmlMallocFunc xmlMalloc; + XMLPUBVAR xmlMallocFunc xmlMallocAtomic; + XMLPUBVAR xmlReallocFunc xmlRealloc; + XMLPUBVAR xmlFreeFunc xmlFree; + XMLPUBVAR xmlStrdupFunc xmlMemStrdup; +#endif + +/* + * The way to overload the existing functions. + * The xmlGc function have an extra entry for atomic block + * allocations useful for garbage collected memory allocators + */ +XMLPUBFUN int + xmlMemSetup (xmlFreeFunc freeFunc, + xmlMallocFunc mallocFunc, + xmlReallocFunc reallocFunc, + xmlStrdupFunc strdupFunc); +XMLPUBFUN int + xmlMemGet (xmlFreeFunc *freeFunc, + xmlMallocFunc *mallocFunc, + xmlReallocFunc *reallocFunc, + xmlStrdupFunc *strdupFunc); +XMLPUBFUN int + xmlGcMemSetup (xmlFreeFunc freeFunc, + xmlMallocFunc mallocFunc, + xmlMallocFunc mallocAtomicFunc, + xmlReallocFunc reallocFunc, + xmlStrdupFunc strdupFunc); +XMLPUBFUN int + xmlGcMemGet (xmlFreeFunc *freeFunc, + xmlMallocFunc *mallocFunc, + xmlMallocFunc *mallocAtomicFunc, + xmlReallocFunc *reallocFunc, + xmlStrdupFunc *strdupFunc); + +/* + * Initialization of the memory layer. + */ +XML_DEPRECATED +XMLPUBFUN int + xmlInitMemory (void); + +/* + * Cleanup of the memory layer. + */ +XML_DEPRECATED +XMLPUBFUN void + xmlCleanupMemory (void); +/* + * These are specific to the XML debug memory wrapper. + */ +XMLPUBFUN size_t + xmlMemSize (void *ptr); +XMLPUBFUN int + xmlMemUsed (void); +XMLPUBFUN int + xmlMemBlocks (void); +XMLPUBFUN void + xmlMemDisplay (FILE *fp); +XMLPUBFUN void + xmlMemDisplayLast(FILE *fp, long nbBytes); +XMLPUBFUN void + xmlMemShow (FILE *fp, int nr); +XMLPUBFUN void + xmlMemoryDump (void); +XMLPUBFUN void * + xmlMemMalloc (size_t size) LIBXML_ATTR_ALLOC_SIZE(1); +XMLPUBFUN void * + xmlMemRealloc (void *ptr,size_t size); +XMLPUBFUN void + xmlMemFree (void *ptr); +XMLPUBFUN char * + xmlMemoryStrdup (const char *str); +XMLPUBFUN void * + xmlMallocLoc (size_t size, const char *file, int line) LIBXML_ATTR_ALLOC_SIZE(1); +XMLPUBFUN void * + xmlReallocLoc (void *ptr, size_t size, const char *file, int line); +XMLPUBFUN void * + xmlMallocAtomicLoc (size_t size, const char *file, int line) LIBXML_ATTR_ALLOC_SIZE(1); +XMLPUBFUN char * + xmlMemStrdupLoc (const char *str, const char *file, int line); + + +/** DOC_DISABLE */ +#ifdef DEBUG_MEMORY_LOCATION +/** + * xmlMalloc: + * @size: number of bytes to allocate + * + * Wrapper for the malloc() function used in the XML library. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlMalloc(size) xmlMallocLoc((size), __FILE__, __LINE__) +/** + * xmlMallocAtomic: + * @size: number of bytes to allocate + * + * Wrapper for the malloc() function used in the XML library for allocation + * of block not containing pointers to other areas. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlMallocAtomic(size) xmlMallocAtomicLoc((size), __FILE__, __LINE__) +/** + * xmlRealloc: + * @ptr: pointer to the existing allocated area + * @size: number of bytes to allocate + * + * Wrapper for the realloc() function used in the XML library. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlRealloc(ptr, size) xmlReallocLoc((ptr), (size), __FILE__, __LINE__) +/** + * xmlMemStrdup: + * @str: pointer to the existing string + * + * Wrapper for the strdup() function, xmlStrdup() is usually preferred. + * + * Returns the pointer to the allocated area or NULL in case of error. + */ +#define xmlMemStrdup(str) xmlMemStrdupLoc((str), __FILE__, __LINE__) + +#endif /* DEBUG_MEMORY_LOCATION */ +/** DOC_ENABLE */ + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* __DEBUG_MEMORY_ALLOC__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmodule.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmodule.h new file mode 100644 index 00000000..279986c1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlmodule.h @@ -0,0 +1,57 @@ +/* + * Summary: dynamic module loading + * Description: basic API for dynamic module loading, used by + * libexslt added in 2.6.17 + * + * Copy: See Copyright for the status of this software. + * + * Author: Joel W. Reed + */ + +#ifndef __XML_MODULE_H__ +#define __XML_MODULE_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_MODULES_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlModulePtr: + * + * A handle to a dynamically loaded module + */ +typedef struct _xmlModule xmlModule; +typedef xmlModule *xmlModulePtr; + +/** + * xmlModuleOption: + * + * enumeration of options that can be passed down to xmlModuleOpen() + */ +typedef enum { + XML_MODULE_LAZY = 1, /* lazy binding */ + XML_MODULE_LOCAL= 2 /* local binding */ +} xmlModuleOption; + +XMLPUBFUN xmlModulePtr xmlModuleOpen (const char *filename, + int options); + +XMLPUBFUN int xmlModuleSymbol (xmlModulePtr module, + const char* name, + void **result); + +XMLPUBFUN int xmlModuleClose (xmlModulePtr module); + +XMLPUBFUN int xmlModuleFree (xmlModulePtr module); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_MODULES_ENABLED */ + +#endif /*__XML_MODULE_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlreader.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlreader.h new file mode 100644 index 00000000..b9f69897 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlreader.h @@ -0,0 +1,434 @@ +/* + * Summary: the XMLReader implementation + * Description: API of the XML streaming API based on C# interfaces. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XMLREADER_H__ +#define __XML_XMLREADER_H__ + +#include <libxml/xmlversion.h> +#include <libxml/tree.h> +#include <libxml/xmlerror.h> +#include <libxml/xmlIO.h> +#ifdef LIBXML_SCHEMAS_ENABLED +#include <libxml/relaxng.h> +#include <libxml/xmlschemas.h> +#endif +/* for compatibility */ +#include <libxml/parser.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlParserSeverities: + * + * How severe an error callback is when the per-reader error callback API + * is used. + */ +typedef enum { + XML_PARSER_SEVERITY_VALIDITY_WARNING = 1, + XML_PARSER_SEVERITY_VALIDITY_ERROR = 2, + XML_PARSER_SEVERITY_WARNING = 3, + XML_PARSER_SEVERITY_ERROR = 4 +} xmlParserSeverities; + +#ifdef LIBXML_READER_ENABLED + +/** + * xmlTextReaderMode: + * + * Internal state values for the reader. + */ +typedef enum { + XML_TEXTREADER_MODE_INITIAL = 0, + XML_TEXTREADER_MODE_INTERACTIVE = 1, + XML_TEXTREADER_MODE_ERROR = 2, + XML_TEXTREADER_MODE_EOF =3, + XML_TEXTREADER_MODE_CLOSED = 4, + XML_TEXTREADER_MODE_READING = 5 +} xmlTextReaderMode; + +/** + * xmlParserProperties: + * + * Some common options to use with xmlTextReaderSetParserProp, but it + * is better to use xmlParserOption and the xmlReaderNewxxx and + * xmlReaderForxxx APIs now. + */ +typedef enum { + XML_PARSER_LOADDTD = 1, + XML_PARSER_DEFAULTATTRS = 2, + XML_PARSER_VALIDATE = 3, + XML_PARSER_SUBST_ENTITIES = 4 +} xmlParserProperties; + +/** + * xmlReaderTypes: + * + * Predefined constants for the different types of nodes. + */ +typedef enum { + XML_READER_TYPE_NONE = 0, + XML_READER_TYPE_ELEMENT = 1, + XML_READER_TYPE_ATTRIBUTE = 2, + XML_READER_TYPE_TEXT = 3, + XML_READER_TYPE_CDATA = 4, + XML_READER_TYPE_ENTITY_REFERENCE = 5, + XML_READER_TYPE_ENTITY = 6, + XML_READER_TYPE_PROCESSING_INSTRUCTION = 7, + XML_READER_TYPE_COMMENT = 8, + XML_READER_TYPE_DOCUMENT = 9, + XML_READER_TYPE_DOCUMENT_TYPE = 10, + XML_READER_TYPE_DOCUMENT_FRAGMENT = 11, + XML_READER_TYPE_NOTATION = 12, + XML_READER_TYPE_WHITESPACE = 13, + XML_READER_TYPE_SIGNIFICANT_WHITESPACE = 14, + XML_READER_TYPE_END_ELEMENT = 15, + XML_READER_TYPE_END_ENTITY = 16, + XML_READER_TYPE_XML_DECLARATION = 17 +} xmlReaderTypes; + +/** + * xmlTextReader: + * + * Structure for an xmlReader context. + */ +typedef struct _xmlTextReader xmlTextReader; + +/** + * xmlTextReaderPtr: + * + * Pointer to an xmlReader context. + */ +typedef xmlTextReader *xmlTextReaderPtr; + +/* + * Constructors & Destructor + */ +XMLPUBFUN xmlTextReaderPtr + xmlNewTextReader (xmlParserInputBufferPtr input, + const char *URI); +XMLPUBFUN xmlTextReaderPtr + xmlNewTextReaderFilename(const char *URI); + +XMLPUBFUN void + xmlFreeTextReader (xmlTextReaderPtr reader); + +XMLPUBFUN int + xmlTextReaderSetup(xmlTextReaderPtr reader, + xmlParserInputBufferPtr input, const char *URL, + const char *encoding, int options); +XMLPUBFUN void + xmlTextReaderSetMaxAmplification(xmlTextReaderPtr reader, + unsigned maxAmpl); + +/* + * Iterators + */ +XMLPUBFUN int + xmlTextReaderRead (xmlTextReaderPtr reader); + +#ifdef LIBXML_WRITER_ENABLED +XMLPUBFUN xmlChar * + xmlTextReaderReadInnerXml(xmlTextReaderPtr reader); + +XMLPUBFUN xmlChar * + xmlTextReaderReadOuterXml(xmlTextReaderPtr reader); +#endif + +XMLPUBFUN xmlChar * + xmlTextReaderReadString (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderReadAttributeValue(xmlTextReaderPtr reader); + +/* + * Attributes of the node + */ +XMLPUBFUN int + xmlTextReaderAttributeCount(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderDepth (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderHasAttributes(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderHasValue(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderIsDefault (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderIsEmptyElement(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderNodeType (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderQuoteChar (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderReadState (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderIsNamespaceDecl(xmlTextReaderPtr reader); + +XMLPUBFUN const xmlChar * + xmlTextReaderConstBaseUri (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstLocalName (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstName (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstNamespaceUri(xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstPrefix (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstXmlLang (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstString (xmlTextReaderPtr reader, + const xmlChar *str); +XMLPUBFUN const xmlChar * + xmlTextReaderConstValue (xmlTextReaderPtr reader); + +/* + * use the Const version of the routine for + * better performance and simpler code + */ +XMLPUBFUN xmlChar * + xmlTextReaderBaseUri (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderLocalName (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderName (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderNamespaceUri(xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderPrefix (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderXmlLang (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderValue (xmlTextReaderPtr reader); + +/* + * Methods of the XmlTextReader + */ +XMLPUBFUN int + xmlTextReaderClose (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderGetAttributeNo (xmlTextReaderPtr reader, + int no); +XMLPUBFUN xmlChar * + xmlTextReaderGetAttribute (xmlTextReaderPtr reader, + const xmlChar *name); +XMLPUBFUN xmlChar * + xmlTextReaderGetAttributeNs (xmlTextReaderPtr reader, + const xmlChar *localName, + const xmlChar *namespaceURI); +XMLPUBFUN xmlParserInputBufferPtr + xmlTextReaderGetRemainder (xmlTextReaderPtr reader); +XMLPUBFUN xmlChar * + xmlTextReaderLookupNamespace(xmlTextReaderPtr reader, + const xmlChar *prefix); +XMLPUBFUN int + xmlTextReaderMoveToAttributeNo(xmlTextReaderPtr reader, + int no); +XMLPUBFUN int + xmlTextReaderMoveToAttribute(xmlTextReaderPtr reader, + const xmlChar *name); +XMLPUBFUN int + xmlTextReaderMoveToAttributeNs(xmlTextReaderPtr reader, + const xmlChar *localName, + const xmlChar *namespaceURI); +XMLPUBFUN int + xmlTextReaderMoveToFirstAttribute(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderMoveToNextAttribute(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderMoveToElement (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderNormalization (xmlTextReaderPtr reader); +XMLPUBFUN const xmlChar * + xmlTextReaderConstEncoding (xmlTextReaderPtr reader); + +/* + * Extensions + */ +XMLPUBFUN int + xmlTextReaderSetParserProp (xmlTextReaderPtr reader, + int prop, + int value); +XMLPUBFUN int + xmlTextReaderGetParserProp (xmlTextReaderPtr reader, + int prop); +XMLPUBFUN xmlNodePtr + xmlTextReaderCurrentNode (xmlTextReaderPtr reader); + +XMLPUBFUN int + xmlTextReaderGetParserLineNumber(xmlTextReaderPtr reader); + +XMLPUBFUN int + xmlTextReaderGetParserColumnNumber(xmlTextReaderPtr reader); + +XMLPUBFUN xmlNodePtr + xmlTextReaderPreserve (xmlTextReaderPtr reader); +#ifdef LIBXML_PATTERN_ENABLED +XMLPUBFUN int + xmlTextReaderPreservePattern(xmlTextReaderPtr reader, + const xmlChar *pattern, + const xmlChar **namespaces); +#endif /* LIBXML_PATTERN_ENABLED */ +XMLPUBFUN xmlDocPtr + xmlTextReaderCurrentDoc (xmlTextReaderPtr reader); +XMLPUBFUN xmlNodePtr + xmlTextReaderExpand (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderNext (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderNextSibling (xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderIsValid (xmlTextReaderPtr reader); +#ifdef LIBXML_SCHEMAS_ENABLED +XMLPUBFUN int + xmlTextReaderRelaxNGValidate(xmlTextReaderPtr reader, + const char *rng); +XMLPUBFUN int + xmlTextReaderRelaxNGValidateCtxt(xmlTextReaderPtr reader, + xmlRelaxNGValidCtxtPtr ctxt, + int options); + +XMLPUBFUN int + xmlTextReaderRelaxNGSetSchema(xmlTextReaderPtr reader, + xmlRelaxNGPtr schema); +XMLPUBFUN int + xmlTextReaderSchemaValidate (xmlTextReaderPtr reader, + const char *xsd); +XMLPUBFUN int + xmlTextReaderSchemaValidateCtxt(xmlTextReaderPtr reader, + xmlSchemaValidCtxtPtr ctxt, + int options); +XMLPUBFUN int + xmlTextReaderSetSchema (xmlTextReaderPtr reader, + xmlSchemaPtr schema); +#endif +XMLPUBFUN const xmlChar * + xmlTextReaderConstXmlVersion(xmlTextReaderPtr reader); +XMLPUBFUN int + xmlTextReaderStandalone (xmlTextReaderPtr reader); + + +/* + * Index lookup + */ +XMLPUBFUN long + xmlTextReaderByteConsumed (xmlTextReaderPtr reader); + +/* + * New more complete APIs for simpler creation and reuse of readers + */ +XMLPUBFUN xmlTextReaderPtr + xmlReaderWalker (xmlDocPtr doc); +XMLPUBFUN xmlTextReaderPtr + xmlReaderForDoc (const xmlChar * cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlTextReaderPtr + xmlReaderForFile (const char *filename, + const char *encoding, + int options); +XMLPUBFUN xmlTextReaderPtr + xmlReaderForMemory (const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlTextReaderPtr + xmlReaderForFd (int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN xmlTextReaderPtr + xmlReaderForIO (xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); + +XMLPUBFUN int + xmlReaderNewWalker (xmlTextReaderPtr reader, + xmlDocPtr doc); +XMLPUBFUN int + xmlReaderNewDoc (xmlTextReaderPtr reader, + const xmlChar * cur, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN int + xmlReaderNewFile (xmlTextReaderPtr reader, + const char *filename, + const char *encoding, + int options); +XMLPUBFUN int + xmlReaderNewMemory (xmlTextReaderPtr reader, + const char *buffer, + int size, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN int + xmlReaderNewFd (xmlTextReaderPtr reader, + int fd, + const char *URL, + const char *encoding, + int options); +XMLPUBFUN int + xmlReaderNewIO (xmlTextReaderPtr reader, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void *ioctx, + const char *URL, + const char *encoding, + int options); +/* + * Error handling extensions + */ +typedef void * xmlTextReaderLocatorPtr; + +/** + * xmlTextReaderErrorFunc: + * @arg: the user argument + * @msg: the message + * @severity: the severity of the error + * @locator: a locator indicating where the error occurred + * + * Signature of an error callback from a reader parser + */ +typedef void (*xmlTextReaderErrorFunc)(void *arg, + const char *msg, + xmlParserSeverities severity, + xmlTextReaderLocatorPtr locator); +XMLPUBFUN int + xmlTextReaderLocatorLineNumber(xmlTextReaderLocatorPtr locator); +XMLPUBFUN xmlChar * + xmlTextReaderLocatorBaseURI (xmlTextReaderLocatorPtr locator); +XMLPUBFUN void + xmlTextReaderSetErrorHandler(xmlTextReaderPtr reader, + xmlTextReaderErrorFunc f, + void *arg); +XMLPUBFUN void + xmlTextReaderSetStructuredErrorHandler(xmlTextReaderPtr reader, + xmlStructuredErrorFunc f, + void *arg); +XMLPUBFUN void + xmlTextReaderGetErrorHandler(xmlTextReaderPtr reader, + xmlTextReaderErrorFunc *f, + void **arg); + +#endif /* LIBXML_READER_ENABLED */ + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XMLREADER_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlregexp.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlregexp.h new file mode 100644 index 00000000..2d66437a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlregexp.h @@ -0,0 +1,215 @@ +/* + * Summary: regular expressions handling + * Description: basic API for libxml regular expressions handling used + * for XML Schemas and validation. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_REGEXP_H__ +#define __XML_REGEXP_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/xmlstring.h> + +#ifdef LIBXML_REGEXP_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlRegexpPtr: + * + * A libxml regular expression, they can actually be far more complex + * thank the POSIX regex expressions. + */ +typedef struct _xmlRegexp xmlRegexp; +typedef xmlRegexp *xmlRegexpPtr; + +/** + * xmlRegExecCtxtPtr: + * + * A libxml progressive regular expression evaluation context + */ +typedef struct _xmlRegExecCtxt xmlRegExecCtxt; +typedef xmlRegExecCtxt *xmlRegExecCtxtPtr; + +/* + * The POSIX like API + */ +XMLPUBFUN xmlRegexpPtr + xmlRegexpCompile (const xmlChar *regexp); +XMLPUBFUN void xmlRegFreeRegexp(xmlRegexpPtr regexp); +XMLPUBFUN int + xmlRegexpExec (xmlRegexpPtr comp, + const xmlChar *value); +XMLPUBFUN void + xmlRegexpPrint (FILE *output, + xmlRegexpPtr regexp); +XMLPUBFUN int + xmlRegexpIsDeterminist(xmlRegexpPtr comp); + +/** + * xmlRegExecCallbacks: + * @exec: the regular expression context + * @token: the current token string + * @transdata: transition data + * @inputdata: input data + * + * Callback function when doing a transition in the automata + */ +typedef void (*xmlRegExecCallbacks) (xmlRegExecCtxtPtr exec, + const xmlChar *token, + void *transdata, + void *inputdata); + +/* + * The progressive API + */ +XMLPUBFUN xmlRegExecCtxtPtr + xmlRegNewExecCtxt (xmlRegexpPtr comp, + xmlRegExecCallbacks callback, + void *data); +XMLPUBFUN void + xmlRegFreeExecCtxt (xmlRegExecCtxtPtr exec); +XMLPUBFUN int + xmlRegExecPushString(xmlRegExecCtxtPtr exec, + const xmlChar *value, + void *data); +XMLPUBFUN int + xmlRegExecPushString2(xmlRegExecCtxtPtr exec, + const xmlChar *value, + const xmlChar *value2, + void *data); + +XMLPUBFUN int + xmlRegExecNextValues(xmlRegExecCtxtPtr exec, + int *nbval, + int *nbneg, + xmlChar **values, + int *terminal); +XMLPUBFUN int + xmlRegExecErrInfo (xmlRegExecCtxtPtr exec, + const xmlChar **string, + int *nbval, + int *nbneg, + xmlChar **values, + int *terminal); +#ifdef LIBXML_EXPR_ENABLED +/* + * Formal regular expression handling + * Its goal is to do some formal work on content models + */ + +/* expressions are used within a context */ +typedef struct _xmlExpCtxt xmlExpCtxt; +typedef xmlExpCtxt *xmlExpCtxtPtr; + +XMLPUBFUN void + xmlExpFreeCtxt (xmlExpCtxtPtr ctxt); +XMLPUBFUN xmlExpCtxtPtr + xmlExpNewCtxt (int maxNodes, + xmlDictPtr dict); + +XMLPUBFUN int + xmlExpCtxtNbNodes(xmlExpCtxtPtr ctxt); +XMLPUBFUN int + xmlExpCtxtNbCons(xmlExpCtxtPtr ctxt); + +/* Expressions are trees but the tree is opaque */ +typedef struct _xmlExpNode xmlExpNode; +typedef xmlExpNode *xmlExpNodePtr; + +typedef enum { + XML_EXP_EMPTY = 0, + XML_EXP_FORBID = 1, + XML_EXP_ATOM = 2, + XML_EXP_SEQ = 3, + XML_EXP_OR = 4, + XML_EXP_COUNT = 5 +} xmlExpNodeType; + +/* + * 2 core expressions shared by all for the empty language set + * and for the set with just the empty token + */ +XMLPUBVAR xmlExpNodePtr forbiddenExp; +XMLPUBVAR xmlExpNodePtr emptyExp; + +/* + * Expressions are reference counted internally + */ +XMLPUBFUN void + xmlExpFree (xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr); +XMLPUBFUN void + xmlExpRef (xmlExpNodePtr expr); + +/* + * constructors can be either manual or from a string + */ +XMLPUBFUN xmlExpNodePtr + xmlExpParse (xmlExpCtxtPtr ctxt, + const char *expr); +XMLPUBFUN xmlExpNodePtr + xmlExpNewAtom (xmlExpCtxtPtr ctxt, + const xmlChar *name, + int len); +XMLPUBFUN xmlExpNodePtr + xmlExpNewOr (xmlExpCtxtPtr ctxt, + xmlExpNodePtr left, + xmlExpNodePtr right); +XMLPUBFUN xmlExpNodePtr + xmlExpNewSeq (xmlExpCtxtPtr ctxt, + xmlExpNodePtr left, + xmlExpNodePtr right); +XMLPUBFUN xmlExpNodePtr + xmlExpNewRange (xmlExpCtxtPtr ctxt, + xmlExpNodePtr subset, + int min, + int max); +/* + * The really interesting APIs + */ +XMLPUBFUN int + xmlExpIsNillable(xmlExpNodePtr expr); +XMLPUBFUN int + xmlExpMaxToken (xmlExpNodePtr expr); +XMLPUBFUN int + xmlExpGetLanguage(xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr, + const xmlChar**langList, + int len); +XMLPUBFUN int + xmlExpGetStart (xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr, + const xmlChar**tokList, + int len); +XMLPUBFUN xmlExpNodePtr + xmlExpStringDerive(xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr, + const xmlChar *str, + int len); +XMLPUBFUN xmlExpNodePtr + xmlExpExpDerive (xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr, + xmlExpNodePtr sub); +XMLPUBFUN int + xmlExpSubsume (xmlExpCtxtPtr ctxt, + xmlExpNodePtr expr, + xmlExpNodePtr sub); +XMLPUBFUN void + xmlExpDump (xmlBufferPtr buf, + xmlExpNodePtr expr); +#endif /* LIBXML_EXPR_ENABLED */ +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_REGEXP_ENABLED */ + +#endif /*__XML_REGEXP_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlsave.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlsave.h new file mode 100644 index 00000000..fbf293a2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlsave.h @@ -0,0 +1,97 @@ +/* + * Summary: the XML document serializer + * Description: API to save document or subtree of document + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XMLSAVE_H__ +#define __XML_XMLSAVE_H__ + +#include <libxml/xmlversion.h> +#include <libxml/tree.h> +#include <libxml/encoding.h> +#include <libxml/xmlIO.h> + +#ifdef LIBXML_OUTPUT_ENABLED +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlSaveOption: + * + * This is the set of XML save options that can be passed down + * to the xmlSaveToFd() and similar calls. + */ +typedef enum { + XML_SAVE_FORMAT = 1<<0, /* format save output */ + XML_SAVE_NO_DECL = 1<<1, /* drop the xml declaration */ + XML_SAVE_NO_EMPTY = 1<<2, /* no empty tags */ + XML_SAVE_NO_XHTML = 1<<3, /* disable XHTML1 specific rules */ + XML_SAVE_XHTML = 1<<4, /* force XHTML1 specific rules */ + XML_SAVE_AS_XML = 1<<5, /* force XML serialization on HTML doc */ + XML_SAVE_AS_HTML = 1<<6, /* force HTML serialization on XML doc */ + XML_SAVE_WSNONSIG = 1<<7 /* format with non-significant whitespace */ +} xmlSaveOption; + + +typedef struct _xmlSaveCtxt xmlSaveCtxt; +typedef xmlSaveCtxt *xmlSaveCtxtPtr; + +XMLPUBFUN xmlSaveCtxtPtr + xmlSaveToFd (int fd, + const char *encoding, + int options); +XMLPUBFUN xmlSaveCtxtPtr + xmlSaveToFilename (const char *filename, + const char *encoding, + int options); + +XMLPUBFUN xmlSaveCtxtPtr + xmlSaveToBuffer (xmlBufferPtr buffer, + const char *encoding, + int options); + +XMLPUBFUN xmlSaveCtxtPtr + xmlSaveToIO (xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void *ioctx, + const char *encoding, + int options); + +XMLPUBFUN long + xmlSaveDoc (xmlSaveCtxtPtr ctxt, + xmlDocPtr doc); +XMLPUBFUN long + xmlSaveTree (xmlSaveCtxtPtr ctxt, + xmlNodePtr node); + +XMLPUBFUN int + xmlSaveFlush (xmlSaveCtxtPtr ctxt); +XMLPUBFUN int + xmlSaveClose (xmlSaveCtxtPtr ctxt); +XMLPUBFUN int + xmlSaveSetEscape (xmlSaveCtxtPtr ctxt, + xmlCharEncodingOutputFunc escape); +XMLPUBFUN int + xmlSaveSetAttrEscape (xmlSaveCtxtPtr ctxt, + xmlCharEncodingOutputFunc escape); + +XMLPUBFUN int + xmlThrDefIndentTreeOutput(int v); +XMLPUBFUN const char * + xmlThrDefTreeIndentString(const char * v); +XMLPUBFUN int + xmlThrDefSaveNoEmptyTags(int v); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_OUTPUT_ENABLED */ +#endif /* __XML_XMLSAVE_H__ */ + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemas.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemas.h new file mode 100644 index 00000000..c2af3d70 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemas.h @@ -0,0 +1,249 @@ +/* + * Summary: incomplete XML Schemas structure implementation + * Description: interface to the XML Schemas handling and schema validity + * checking, it is incomplete right now. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SCHEMA_H__ +#define __XML_SCHEMA_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <stdio.h> +#include <libxml/encoding.h> +#include <libxml/tree.h> +#include <libxml/xmlerror.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * This error codes are obsolete; not used any more. + */ +typedef enum { + XML_SCHEMAS_ERR_OK = 0, + XML_SCHEMAS_ERR_NOROOT = 1, + XML_SCHEMAS_ERR_UNDECLAREDELEM, + XML_SCHEMAS_ERR_NOTTOPLEVEL, + XML_SCHEMAS_ERR_MISSING, + XML_SCHEMAS_ERR_WRONGELEM, + XML_SCHEMAS_ERR_NOTYPE, + XML_SCHEMAS_ERR_NOROLLBACK, + XML_SCHEMAS_ERR_ISABSTRACT, + XML_SCHEMAS_ERR_NOTEMPTY, + XML_SCHEMAS_ERR_ELEMCONT, + XML_SCHEMAS_ERR_HAVEDEFAULT, + XML_SCHEMAS_ERR_NOTNILLABLE, + XML_SCHEMAS_ERR_EXTRACONTENT, + XML_SCHEMAS_ERR_INVALIDATTR, + XML_SCHEMAS_ERR_INVALIDELEM, + XML_SCHEMAS_ERR_NOTDETERMINIST, + XML_SCHEMAS_ERR_CONSTRUCT, + XML_SCHEMAS_ERR_INTERNAL, + XML_SCHEMAS_ERR_NOTSIMPLE, + XML_SCHEMAS_ERR_ATTRUNKNOWN, + XML_SCHEMAS_ERR_ATTRINVALID, + XML_SCHEMAS_ERR_VALUE, + XML_SCHEMAS_ERR_FACET, + XML_SCHEMAS_ERR_, + XML_SCHEMAS_ERR_XXX +} xmlSchemaValidError; + +/* +* ATTENTION: Change xmlSchemaSetValidOptions's check +* for invalid values, if adding to the validation +* options below. +*/ +/** + * xmlSchemaValidOption: + * + * This is the set of XML Schema validation options. + */ +typedef enum { + XML_SCHEMA_VAL_VC_I_CREATE = 1<<0 + /* Default/fixed: create an attribute node + * or an element's text node on the instance. + */ +} xmlSchemaValidOption; + +/* + XML_SCHEMA_VAL_XSI_ASSEMBLE = 1<<1, + * assemble schemata using + * xsi:schemaLocation and + * xsi:noNamespaceSchemaLocation +*/ + +/** + * The schemas related types are kept internal + */ +typedef struct _xmlSchema xmlSchema; +typedef xmlSchema *xmlSchemaPtr; + +/** + * xmlSchemaValidityErrorFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of an error callback from an XSD validation + */ +typedef void (*xmlSchemaValidityErrorFunc) + (void *ctx, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); + +/** + * xmlSchemaValidityWarningFunc: + * @ctx: the validation context + * @msg: the message + * @...: extra arguments + * + * Signature of a warning callback from an XSD validation + */ +typedef void (*xmlSchemaValidityWarningFunc) + (void *ctx, const char *msg, ...) LIBXML_ATTR_FORMAT(2,3); + +/** + * A schemas validation context + */ +typedef struct _xmlSchemaParserCtxt xmlSchemaParserCtxt; +typedef xmlSchemaParserCtxt *xmlSchemaParserCtxtPtr; + +typedef struct _xmlSchemaValidCtxt xmlSchemaValidCtxt; +typedef xmlSchemaValidCtxt *xmlSchemaValidCtxtPtr; + +/** + * xmlSchemaValidityLocatorFunc: + * @ctx: user provided context + * @file: returned file information + * @line: returned line information + * + * A schemas validation locator, a callback called by the validator. + * This is used when file or node information are not available + * to find out what file and line number are affected + * + * Returns: 0 in case of success and -1 in case of error + */ + +typedef int (*xmlSchemaValidityLocatorFunc) (void *ctx, + const char **file, unsigned long *line); + +/* + * Interfaces for parsing. + */ +XMLPUBFUN xmlSchemaParserCtxtPtr + xmlSchemaNewParserCtxt (const char *URL); +XMLPUBFUN xmlSchemaParserCtxtPtr + xmlSchemaNewMemParserCtxt (const char *buffer, + int size); +XMLPUBFUN xmlSchemaParserCtxtPtr + xmlSchemaNewDocParserCtxt (xmlDocPtr doc); +XMLPUBFUN void + xmlSchemaFreeParserCtxt (xmlSchemaParserCtxtPtr ctxt); +XMLPUBFUN void + xmlSchemaSetParserErrors (xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +XMLPUBFUN void + xmlSchemaSetParserStructuredErrors(xmlSchemaParserCtxtPtr ctxt, + xmlStructuredErrorFunc serror, + void *ctx); +XMLPUBFUN int + xmlSchemaGetParserErrors(xmlSchemaParserCtxtPtr ctxt, + xmlSchemaValidityErrorFunc * err, + xmlSchemaValidityWarningFunc * warn, + void **ctx); +XMLPUBFUN int + xmlSchemaIsValid (xmlSchemaValidCtxtPtr ctxt); + +XMLPUBFUN xmlSchemaPtr + xmlSchemaParse (xmlSchemaParserCtxtPtr ctxt); +XMLPUBFUN void + xmlSchemaFree (xmlSchemaPtr schema); +#ifdef LIBXML_OUTPUT_ENABLED +XMLPUBFUN void + xmlSchemaDump (FILE *output, + xmlSchemaPtr schema); +#endif /* LIBXML_OUTPUT_ENABLED */ +/* + * Interfaces for validating + */ +XMLPUBFUN void + xmlSchemaSetValidErrors (xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc err, + xmlSchemaValidityWarningFunc warn, + void *ctx); +XMLPUBFUN void + xmlSchemaSetValidStructuredErrors(xmlSchemaValidCtxtPtr ctxt, + xmlStructuredErrorFunc serror, + void *ctx); +XMLPUBFUN int + xmlSchemaGetValidErrors (xmlSchemaValidCtxtPtr ctxt, + xmlSchemaValidityErrorFunc *err, + xmlSchemaValidityWarningFunc *warn, + void **ctx); +XMLPUBFUN int + xmlSchemaSetValidOptions (xmlSchemaValidCtxtPtr ctxt, + int options); +XMLPUBFUN void + xmlSchemaValidateSetFilename(xmlSchemaValidCtxtPtr vctxt, + const char *filename); +XMLPUBFUN int + xmlSchemaValidCtxtGetOptions(xmlSchemaValidCtxtPtr ctxt); + +XMLPUBFUN xmlSchemaValidCtxtPtr + xmlSchemaNewValidCtxt (xmlSchemaPtr schema); +XMLPUBFUN void + xmlSchemaFreeValidCtxt (xmlSchemaValidCtxtPtr ctxt); +XMLPUBFUN int + xmlSchemaValidateDoc (xmlSchemaValidCtxtPtr ctxt, + xmlDocPtr instance); +XMLPUBFUN int + xmlSchemaValidateOneElement (xmlSchemaValidCtxtPtr ctxt, + xmlNodePtr elem); +XMLPUBFUN int + xmlSchemaValidateStream (xmlSchemaValidCtxtPtr ctxt, + xmlParserInputBufferPtr input, + xmlCharEncoding enc, + xmlSAXHandlerPtr sax, + void *user_data); +XMLPUBFUN int + xmlSchemaValidateFile (xmlSchemaValidCtxtPtr ctxt, + const char * filename, + int options); + +XMLPUBFUN xmlParserCtxtPtr + xmlSchemaValidCtxtGetParserCtxt(xmlSchemaValidCtxtPtr ctxt); + +/* + * Interface to insert Schemas SAX validation in a SAX stream + */ +typedef struct _xmlSchemaSAXPlug xmlSchemaSAXPlugStruct; +typedef xmlSchemaSAXPlugStruct *xmlSchemaSAXPlugPtr; + +XMLPUBFUN xmlSchemaSAXPlugPtr + xmlSchemaSAXPlug (xmlSchemaValidCtxtPtr ctxt, + xmlSAXHandlerPtr *sax, + void **user_data); +XMLPUBFUN int + xmlSchemaSAXUnplug (xmlSchemaSAXPlugPtr plug); + + +XMLPUBFUN void + xmlSchemaValidateSetLocator (xmlSchemaValidCtxtPtr vctxt, + xmlSchemaValidityLocatorFunc f, + void *ctxt); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemastypes.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemastypes.h new file mode 100644 index 00000000..e2cde357 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlschemastypes.h @@ -0,0 +1,152 @@ +/* + * Summary: implementation of XML Schema Datatypes + * Description: module providing the XML Schema Datatypes implementation + * both definition and validity checking + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + + +#ifndef __XML_SCHEMA_TYPES_H__ +#define __XML_SCHEMA_TYPES_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_SCHEMAS_ENABLED + +#include <libxml/schemasInternals.h> +#include <libxml/xmlschemas.h> + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { + XML_SCHEMA_WHITESPACE_UNKNOWN = 0, + XML_SCHEMA_WHITESPACE_PRESERVE = 1, + XML_SCHEMA_WHITESPACE_REPLACE = 2, + XML_SCHEMA_WHITESPACE_COLLAPSE = 3 +} xmlSchemaWhitespaceValueType; + +XMLPUBFUN int + xmlSchemaInitTypes (void); +XML_DEPRECATED +XMLPUBFUN void + xmlSchemaCleanupTypes (void); +XMLPUBFUN xmlSchemaTypePtr + xmlSchemaGetPredefinedType (const xmlChar *name, + const xmlChar *ns); +XMLPUBFUN int + xmlSchemaValidatePredefinedType (xmlSchemaTypePtr type, + const xmlChar *value, + xmlSchemaValPtr *val); +XMLPUBFUN int + xmlSchemaValPredefTypeNode (xmlSchemaTypePtr type, + const xmlChar *value, + xmlSchemaValPtr *val, + xmlNodePtr node); +XMLPUBFUN int + xmlSchemaValidateFacet (xmlSchemaTypePtr base, + xmlSchemaFacetPtr facet, + const xmlChar *value, + xmlSchemaValPtr val); +XMLPUBFUN int + xmlSchemaValidateFacetWhtsp (xmlSchemaFacetPtr facet, + xmlSchemaWhitespaceValueType fws, + xmlSchemaValType valType, + const xmlChar *value, + xmlSchemaValPtr val, + xmlSchemaWhitespaceValueType ws); +XMLPUBFUN void + xmlSchemaFreeValue (xmlSchemaValPtr val); +XMLPUBFUN xmlSchemaFacetPtr + xmlSchemaNewFacet (void); +XMLPUBFUN int + xmlSchemaCheckFacet (xmlSchemaFacetPtr facet, + xmlSchemaTypePtr typeDecl, + xmlSchemaParserCtxtPtr ctxt, + const xmlChar *name); +XMLPUBFUN void + xmlSchemaFreeFacet (xmlSchemaFacetPtr facet); +XMLPUBFUN int + xmlSchemaCompareValues (xmlSchemaValPtr x, + xmlSchemaValPtr y); +XMLPUBFUN xmlSchemaTypePtr + xmlSchemaGetBuiltInListSimpleTypeItemType (xmlSchemaTypePtr type); +XMLPUBFUN int + xmlSchemaValidateListSimpleTypeFacet (xmlSchemaFacetPtr facet, + const xmlChar *value, + unsigned long actualLen, + unsigned long *expectedLen); +XMLPUBFUN xmlSchemaTypePtr + xmlSchemaGetBuiltInType (xmlSchemaValType type); +XMLPUBFUN int + xmlSchemaIsBuiltInTypeFacet (xmlSchemaTypePtr type, + int facetType); +XMLPUBFUN xmlChar * + xmlSchemaCollapseString (const xmlChar *value); +XMLPUBFUN xmlChar * + xmlSchemaWhiteSpaceReplace (const xmlChar *value); +XMLPUBFUN unsigned long + xmlSchemaGetFacetValueAsULong (xmlSchemaFacetPtr facet); +XMLPUBFUN int + xmlSchemaValidateLengthFacet (xmlSchemaTypePtr type, + xmlSchemaFacetPtr facet, + const xmlChar *value, + xmlSchemaValPtr val, + unsigned long *length); +XMLPUBFUN int + xmlSchemaValidateLengthFacetWhtsp(xmlSchemaFacetPtr facet, + xmlSchemaValType valType, + const xmlChar *value, + xmlSchemaValPtr val, + unsigned long *length, + xmlSchemaWhitespaceValueType ws); +XMLPUBFUN int + xmlSchemaValPredefTypeNodeNoNorm(xmlSchemaTypePtr type, + const xmlChar *value, + xmlSchemaValPtr *val, + xmlNodePtr node); +XMLPUBFUN int + xmlSchemaGetCanonValue (xmlSchemaValPtr val, + const xmlChar **retValue); +XMLPUBFUN int + xmlSchemaGetCanonValueWhtsp (xmlSchemaValPtr val, + const xmlChar **retValue, + xmlSchemaWhitespaceValueType ws); +XMLPUBFUN int + xmlSchemaValueAppend (xmlSchemaValPtr prev, + xmlSchemaValPtr cur); +XMLPUBFUN xmlSchemaValPtr + xmlSchemaValueGetNext (xmlSchemaValPtr cur); +XMLPUBFUN const xmlChar * + xmlSchemaValueGetAsString (xmlSchemaValPtr val); +XMLPUBFUN int + xmlSchemaValueGetAsBoolean (xmlSchemaValPtr val); +XMLPUBFUN xmlSchemaValPtr + xmlSchemaNewStringValue (xmlSchemaValType type, + const xmlChar *value); +XMLPUBFUN xmlSchemaValPtr + xmlSchemaNewNOTATIONValue (const xmlChar *name, + const xmlChar *ns); +XMLPUBFUN xmlSchemaValPtr + xmlSchemaNewQNameValue (const xmlChar *namespaceName, + const xmlChar *localName); +XMLPUBFUN int + xmlSchemaCompareValuesWhtsp (xmlSchemaValPtr x, + xmlSchemaWhitespaceValueType xws, + xmlSchemaValPtr y, + xmlSchemaWhitespaceValueType yws); +XMLPUBFUN xmlSchemaValPtr + xmlSchemaCopyValue (xmlSchemaValPtr val); +XMLPUBFUN xmlSchemaValType + xmlSchemaGetValType (xmlSchemaValPtr val); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_SCHEMAS_ENABLED */ +#endif /* __XML_SCHEMA_TYPES_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlstring.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlstring.h new file mode 100644 index 00000000..db11a0b0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlstring.h @@ -0,0 +1,140 @@ +/* + * Summary: set of routines to process strings + * Description: type and interfaces needed for the internal string handling + * of the library, especially UTF8 processing. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_STRING_H__ +#define __XML_STRING_H__ + +#include <stdarg.h> +#include <libxml/xmlversion.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xmlChar: + * + * This is a basic byte in an UTF-8 encoded string. + * It's unsigned allowing to pinpoint case where char * are assigned + * to xmlChar * (possibly making serialization back impossible). + */ +typedef unsigned char xmlChar; + +/** + * BAD_CAST: + * + * Macro to cast a string to an xmlChar * when one know its safe. + */ +#define BAD_CAST (xmlChar *) + +/* + * xmlChar handling + */ +XMLPUBFUN xmlChar * + xmlStrdup (const xmlChar *cur); +XMLPUBFUN xmlChar * + xmlStrndup (const xmlChar *cur, + int len); +XMLPUBFUN xmlChar * + xmlCharStrndup (const char *cur, + int len); +XMLPUBFUN xmlChar * + xmlCharStrdup (const char *cur); +XMLPUBFUN xmlChar * + xmlStrsub (const xmlChar *str, + int start, + int len); +XMLPUBFUN const xmlChar * + xmlStrchr (const xmlChar *str, + xmlChar val); +XMLPUBFUN const xmlChar * + xmlStrstr (const xmlChar *str, + const xmlChar *val); +XMLPUBFUN const xmlChar * + xmlStrcasestr (const xmlChar *str, + const xmlChar *val); +XMLPUBFUN int + xmlStrcmp (const xmlChar *str1, + const xmlChar *str2); +XMLPUBFUN int + xmlStrncmp (const xmlChar *str1, + const xmlChar *str2, + int len); +XMLPUBFUN int + xmlStrcasecmp (const xmlChar *str1, + const xmlChar *str2); +XMLPUBFUN int + xmlStrncasecmp (const xmlChar *str1, + const xmlChar *str2, + int len); +XMLPUBFUN int + xmlStrEqual (const xmlChar *str1, + const xmlChar *str2); +XMLPUBFUN int + xmlStrQEqual (const xmlChar *pref, + const xmlChar *name, + const xmlChar *str); +XMLPUBFUN int + xmlStrlen (const xmlChar *str); +XMLPUBFUN xmlChar * + xmlStrcat (xmlChar *cur, + const xmlChar *add); +XMLPUBFUN xmlChar * + xmlStrncat (xmlChar *cur, + const xmlChar *add, + int len); +XMLPUBFUN xmlChar * + xmlStrncatNew (const xmlChar *str1, + const xmlChar *str2, + int len); +XMLPUBFUN int + xmlStrPrintf (xmlChar *buf, + int len, + const char *msg, + ...) LIBXML_ATTR_FORMAT(3,4); +XMLPUBFUN int + xmlStrVPrintf (xmlChar *buf, + int len, + const char *msg, + va_list ap) LIBXML_ATTR_FORMAT(3,0); + +XMLPUBFUN int + xmlGetUTF8Char (const unsigned char *utf, + int *len); +XMLPUBFUN int + xmlCheckUTF8 (const unsigned char *utf); +XMLPUBFUN int + xmlUTF8Strsize (const xmlChar *utf, + int len); +XMLPUBFUN xmlChar * + xmlUTF8Strndup (const xmlChar *utf, + int len); +XMLPUBFUN const xmlChar * + xmlUTF8Strpos (const xmlChar *utf, + int pos); +XMLPUBFUN int + xmlUTF8Strloc (const xmlChar *utf, + const xmlChar *utfchar); +XMLPUBFUN xmlChar * + xmlUTF8Strsub (const xmlChar *utf, + int start, + int len); +XMLPUBFUN int + xmlUTF8Strlen (const xmlChar *utf); +XMLPUBFUN int + xmlUTF8Size (const xmlChar *utf); +XMLPUBFUN int + xmlUTF8Charcmp (const xmlChar *utf1, + const xmlChar *utf2); + +#ifdef __cplusplus +} +#endif +#endif /* __XML_STRING_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlunicode.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlunicode.h new file mode 100644 index 00000000..2e50a49f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlunicode.h @@ -0,0 +1,202 @@ +/* + * Summary: Unicode character APIs + * Description: API for the Unicode character APIs + * + * This file is automatically generated from the + * UCS description files of the Unicode Character Database + * http://www.unicode.org/Public/4.0-Update1/UCD-4.0.1.html + * using the genUnicode.py Python script. + * + * Generation date: Mon Mar 27 11:09:52 2006 + * Sources: Blocks-4.0.1.txt UnicodeData-4.0.1.txt + * Author: Daniel Veillard + */ + +#ifndef __XML_UNICODE_H__ +#define __XML_UNICODE_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_UNICODE_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +XMLPUBFUN int xmlUCSIsAegeanNumbers (int code); +XMLPUBFUN int xmlUCSIsAlphabeticPresentationForms (int code); +XMLPUBFUN int xmlUCSIsArabic (int code); +XMLPUBFUN int xmlUCSIsArabicPresentationFormsA (int code); +XMLPUBFUN int xmlUCSIsArabicPresentationFormsB (int code); +XMLPUBFUN int xmlUCSIsArmenian (int code); +XMLPUBFUN int xmlUCSIsArrows (int code); +XMLPUBFUN int xmlUCSIsBasicLatin (int code); +XMLPUBFUN int xmlUCSIsBengali (int code); +XMLPUBFUN int xmlUCSIsBlockElements (int code); +XMLPUBFUN int xmlUCSIsBopomofo (int code); +XMLPUBFUN int xmlUCSIsBopomofoExtended (int code); +XMLPUBFUN int xmlUCSIsBoxDrawing (int code); +XMLPUBFUN int xmlUCSIsBraillePatterns (int code); +XMLPUBFUN int xmlUCSIsBuhid (int code); +XMLPUBFUN int xmlUCSIsByzantineMusicalSymbols (int code); +XMLPUBFUN int xmlUCSIsCJKCompatibility (int code); +XMLPUBFUN int xmlUCSIsCJKCompatibilityForms (int code); +XMLPUBFUN int xmlUCSIsCJKCompatibilityIdeographs (int code); +XMLPUBFUN int xmlUCSIsCJKCompatibilityIdeographsSupplement (int code); +XMLPUBFUN int xmlUCSIsCJKRadicalsSupplement (int code); +XMLPUBFUN int xmlUCSIsCJKSymbolsandPunctuation (int code); +XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographs (int code); +XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographsExtensionA (int code); +XMLPUBFUN int xmlUCSIsCJKUnifiedIdeographsExtensionB (int code); +XMLPUBFUN int xmlUCSIsCherokee (int code); +XMLPUBFUN int xmlUCSIsCombiningDiacriticalMarks (int code); +XMLPUBFUN int xmlUCSIsCombiningDiacriticalMarksforSymbols (int code); +XMLPUBFUN int xmlUCSIsCombiningHalfMarks (int code); +XMLPUBFUN int xmlUCSIsCombiningMarksforSymbols (int code); +XMLPUBFUN int xmlUCSIsControlPictures (int code); +XMLPUBFUN int xmlUCSIsCurrencySymbols (int code); +XMLPUBFUN int xmlUCSIsCypriotSyllabary (int code); +XMLPUBFUN int xmlUCSIsCyrillic (int code); +XMLPUBFUN int xmlUCSIsCyrillicSupplement (int code); +XMLPUBFUN int xmlUCSIsDeseret (int code); +XMLPUBFUN int xmlUCSIsDevanagari (int code); +XMLPUBFUN int xmlUCSIsDingbats (int code); +XMLPUBFUN int xmlUCSIsEnclosedAlphanumerics (int code); +XMLPUBFUN int xmlUCSIsEnclosedCJKLettersandMonths (int code); +XMLPUBFUN int xmlUCSIsEthiopic (int code); +XMLPUBFUN int xmlUCSIsGeneralPunctuation (int code); +XMLPUBFUN int xmlUCSIsGeometricShapes (int code); +XMLPUBFUN int xmlUCSIsGeorgian (int code); +XMLPUBFUN int xmlUCSIsGothic (int code); +XMLPUBFUN int xmlUCSIsGreek (int code); +XMLPUBFUN int xmlUCSIsGreekExtended (int code); +XMLPUBFUN int xmlUCSIsGreekandCoptic (int code); +XMLPUBFUN int xmlUCSIsGujarati (int code); +XMLPUBFUN int xmlUCSIsGurmukhi (int code); +XMLPUBFUN int xmlUCSIsHalfwidthandFullwidthForms (int code); +XMLPUBFUN int xmlUCSIsHangulCompatibilityJamo (int code); +XMLPUBFUN int xmlUCSIsHangulJamo (int code); +XMLPUBFUN int xmlUCSIsHangulSyllables (int code); +XMLPUBFUN int xmlUCSIsHanunoo (int code); +XMLPUBFUN int xmlUCSIsHebrew (int code); +XMLPUBFUN int xmlUCSIsHighPrivateUseSurrogates (int code); +XMLPUBFUN int xmlUCSIsHighSurrogates (int code); +XMLPUBFUN int xmlUCSIsHiragana (int code); +XMLPUBFUN int xmlUCSIsIPAExtensions (int code); +XMLPUBFUN int xmlUCSIsIdeographicDescriptionCharacters (int code); +XMLPUBFUN int xmlUCSIsKanbun (int code); +XMLPUBFUN int xmlUCSIsKangxiRadicals (int code); +XMLPUBFUN int xmlUCSIsKannada (int code); +XMLPUBFUN int xmlUCSIsKatakana (int code); +XMLPUBFUN int xmlUCSIsKatakanaPhoneticExtensions (int code); +XMLPUBFUN int xmlUCSIsKhmer (int code); +XMLPUBFUN int xmlUCSIsKhmerSymbols (int code); +XMLPUBFUN int xmlUCSIsLao (int code); +XMLPUBFUN int xmlUCSIsLatin1Supplement (int code); +XMLPUBFUN int xmlUCSIsLatinExtendedA (int code); +XMLPUBFUN int xmlUCSIsLatinExtendedB (int code); +XMLPUBFUN int xmlUCSIsLatinExtendedAdditional (int code); +XMLPUBFUN int xmlUCSIsLetterlikeSymbols (int code); +XMLPUBFUN int xmlUCSIsLimbu (int code); +XMLPUBFUN int xmlUCSIsLinearBIdeograms (int code); +XMLPUBFUN int xmlUCSIsLinearBSyllabary (int code); +XMLPUBFUN int xmlUCSIsLowSurrogates (int code); +XMLPUBFUN int xmlUCSIsMalayalam (int code); +XMLPUBFUN int xmlUCSIsMathematicalAlphanumericSymbols (int code); +XMLPUBFUN int xmlUCSIsMathematicalOperators (int code); +XMLPUBFUN int xmlUCSIsMiscellaneousMathematicalSymbolsA (int code); +XMLPUBFUN int xmlUCSIsMiscellaneousMathematicalSymbolsB (int code); +XMLPUBFUN int xmlUCSIsMiscellaneousSymbols (int code); +XMLPUBFUN int xmlUCSIsMiscellaneousSymbolsandArrows (int code); +XMLPUBFUN int xmlUCSIsMiscellaneousTechnical (int code); +XMLPUBFUN int xmlUCSIsMongolian (int code); +XMLPUBFUN int xmlUCSIsMusicalSymbols (int code); +XMLPUBFUN int xmlUCSIsMyanmar (int code); +XMLPUBFUN int xmlUCSIsNumberForms (int code); +XMLPUBFUN int xmlUCSIsOgham (int code); +XMLPUBFUN int xmlUCSIsOldItalic (int code); +XMLPUBFUN int xmlUCSIsOpticalCharacterRecognition (int code); +XMLPUBFUN int xmlUCSIsOriya (int code); +XMLPUBFUN int xmlUCSIsOsmanya (int code); +XMLPUBFUN int xmlUCSIsPhoneticExtensions (int code); +XMLPUBFUN int xmlUCSIsPrivateUse (int code); +XMLPUBFUN int xmlUCSIsPrivateUseArea (int code); +XMLPUBFUN int xmlUCSIsRunic (int code); +XMLPUBFUN int xmlUCSIsShavian (int code); +XMLPUBFUN int xmlUCSIsSinhala (int code); +XMLPUBFUN int xmlUCSIsSmallFormVariants (int code); +XMLPUBFUN int xmlUCSIsSpacingModifierLetters (int code); +XMLPUBFUN int xmlUCSIsSpecials (int code); +XMLPUBFUN int xmlUCSIsSuperscriptsandSubscripts (int code); +XMLPUBFUN int xmlUCSIsSupplementalArrowsA (int code); +XMLPUBFUN int xmlUCSIsSupplementalArrowsB (int code); +XMLPUBFUN int xmlUCSIsSupplementalMathematicalOperators (int code); +XMLPUBFUN int xmlUCSIsSupplementaryPrivateUseAreaA (int code); +XMLPUBFUN int xmlUCSIsSupplementaryPrivateUseAreaB (int code); +XMLPUBFUN int xmlUCSIsSyriac (int code); +XMLPUBFUN int xmlUCSIsTagalog (int code); +XMLPUBFUN int xmlUCSIsTagbanwa (int code); +XMLPUBFUN int xmlUCSIsTags (int code); +XMLPUBFUN int xmlUCSIsTaiLe (int code); +XMLPUBFUN int xmlUCSIsTaiXuanJingSymbols (int code); +XMLPUBFUN int xmlUCSIsTamil (int code); +XMLPUBFUN int xmlUCSIsTelugu (int code); +XMLPUBFUN int xmlUCSIsThaana (int code); +XMLPUBFUN int xmlUCSIsThai (int code); +XMLPUBFUN int xmlUCSIsTibetan (int code); +XMLPUBFUN int xmlUCSIsUgaritic (int code); +XMLPUBFUN int xmlUCSIsUnifiedCanadianAboriginalSyllabics (int code); +XMLPUBFUN int xmlUCSIsVariationSelectors (int code); +XMLPUBFUN int xmlUCSIsVariationSelectorsSupplement (int code); +XMLPUBFUN int xmlUCSIsYiRadicals (int code); +XMLPUBFUN int xmlUCSIsYiSyllables (int code); +XMLPUBFUN int xmlUCSIsYijingHexagramSymbols (int code); + +XMLPUBFUN int xmlUCSIsBlock (int code, const char *block); + +XMLPUBFUN int xmlUCSIsCatC (int code); +XMLPUBFUN int xmlUCSIsCatCc (int code); +XMLPUBFUN int xmlUCSIsCatCf (int code); +XMLPUBFUN int xmlUCSIsCatCo (int code); +XMLPUBFUN int xmlUCSIsCatCs (int code); +XMLPUBFUN int xmlUCSIsCatL (int code); +XMLPUBFUN int xmlUCSIsCatLl (int code); +XMLPUBFUN int xmlUCSIsCatLm (int code); +XMLPUBFUN int xmlUCSIsCatLo (int code); +XMLPUBFUN int xmlUCSIsCatLt (int code); +XMLPUBFUN int xmlUCSIsCatLu (int code); +XMLPUBFUN int xmlUCSIsCatM (int code); +XMLPUBFUN int xmlUCSIsCatMc (int code); +XMLPUBFUN int xmlUCSIsCatMe (int code); +XMLPUBFUN int xmlUCSIsCatMn (int code); +XMLPUBFUN int xmlUCSIsCatN (int code); +XMLPUBFUN int xmlUCSIsCatNd (int code); +XMLPUBFUN int xmlUCSIsCatNl (int code); +XMLPUBFUN int xmlUCSIsCatNo (int code); +XMLPUBFUN int xmlUCSIsCatP (int code); +XMLPUBFUN int xmlUCSIsCatPc (int code); +XMLPUBFUN int xmlUCSIsCatPd (int code); +XMLPUBFUN int xmlUCSIsCatPe (int code); +XMLPUBFUN int xmlUCSIsCatPf (int code); +XMLPUBFUN int xmlUCSIsCatPi (int code); +XMLPUBFUN int xmlUCSIsCatPo (int code); +XMLPUBFUN int xmlUCSIsCatPs (int code); +XMLPUBFUN int xmlUCSIsCatS (int code); +XMLPUBFUN int xmlUCSIsCatSc (int code); +XMLPUBFUN int xmlUCSIsCatSk (int code); +XMLPUBFUN int xmlUCSIsCatSm (int code); +XMLPUBFUN int xmlUCSIsCatSo (int code); +XMLPUBFUN int xmlUCSIsCatZ (int code); +XMLPUBFUN int xmlUCSIsCatZl (int code); +XMLPUBFUN int xmlUCSIsCatZp (int code); +XMLPUBFUN int xmlUCSIsCatZs (int code); + +XMLPUBFUN int xmlUCSIsCat (int code, const char *cat); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_UNICODE_ENABLED */ + +#endif /* __XML_UNICODE_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlversion.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlversion.h new file mode 100644 index 00000000..cec304f6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlversion.h @@ -0,0 +1,511 @@ +/* + * Summary: compile-time version information + * Description: compile-time version information for the XML library + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_VERSION_H__ +#define __XML_VERSION_H__ + +#include <libxml/xmlexports.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * use those to be sure nothing nasty will happen if + * your library and includes mismatch + */ +#ifndef LIBXML2_COMPILING_MSCCDEF +XMLPUBFUN void xmlCheckVersion(int version); +#endif /* LIBXML2_COMPILING_MSCCDEF */ + +/** + * LIBXML_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXML_DOTTED_VERSION "2.12.9" + +/** + * LIBXML_VERSION: + * + * the version number: 1.2.3 value is 10203 + */ +#define LIBXML_VERSION 21209 + +/** + * LIBXML_VERSION_STRING: + * + * the version number string, 1.2.3 value is "10203" + */ +#define LIBXML_VERSION_STRING "21209" + +/** + * LIBXML_VERSION_EXTRA: + * + * extra version information, used to show a git commit description + */ +#define LIBXML_VERSION_EXTRA "" + +/** + * LIBXML_TEST_VERSION: + * + * Macro to check that the libxml version in use is compatible with + * the version the software has been compiled against + */ +#define LIBXML_TEST_VERSION xmlCheckVersion(21209); + +#ifndef VMS +#if 0 +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO +#else +/** + * WITHOUT_TRIO: + * + * defined if the trio support should not be configured in + */ +#define WITHOUT_TRIO +#endif +#else /* VMS */ +/** + * WITH_TRIO: + * + * defined if the trio support need to be configured in + */ +#define WITH_TRIO 1 +#endif /* VMS */ + +/** + * LIBXML_THREAD_ENABLED: + * + * Whether the thread support is configured in + */ +#if 1 +#define LIBXML_THREAD_ENABLED +#endif + +/** + * LIBXML_THREAD_ALLOC_ENABLED: + * + * Whether the allocation hooks are per-thread + */ +#if 0 +#define LIBXML_THREAD_ALLOC_ENABLED +#endif + +/** + * LIBXML_TREE_ENABLED: + * + * Whether the DOM like tree manipulation API support is configured in + */ +#if 1 +#define LIBXML_TREE_ENABLED +#endif + +/** + * LIBXML_OUTPUT_ENABLED: + * + * Whether the serialization/saving support is configured in + */ +#if 1 +#define LIBXML_OUTPUT_ENABLED +#endif + +/** + * LIBXML_PUSH_ENABLED: + * + * Whether the push parsing interfaces are configured in + */ +#if 1 +#define LIBXML_PUSH_ENABLED +#endif + +/** + * LIBXML_READER_ENABLED: + * + * Whether the xmlReader parsing interface is configured in + */ +#if 1 +#define LIBXML_READER_ENABLED +#endif + +/** + * LIBXML_PATTERN_ENABLED: + * + * Whether the xmlPattern node selection interface is configured in + */ +#if 1 +#define LIBXML_PATTERN_ENABLED +#endif + +/** + * LIBXML_WRITER_ENABLED: + * + * Whether the xmlWriter saving interface is configured in + */ +#if 1 +#define LIBXML_WRITER_ENABLED +#endif + +/** + * LIBXML_SAX1_ENABLED: + * + * Whether the older SAX1 interface is configured in + */ +#if 1 +#define LIBXML_SAX1_ENABLED +#endif + +/** + * LIBXML_FTP_ENABLED: + * + * Whether the FTP support is configured in + */ +#if 0 +#define LIBXML_FTP_ENABLED +#endif + +/** + * LIBXML_HTTP_ENABLED: + * + * Whether the HTTP support is configured in + */ +#if 1 +#define LIBXML_HTTP_ENABLED +#endif + +/** + * LIBXML_VALID_ENABLED: + * + * Whether the DTD validation support is configured in + */ +#if 1 +#define LIBXML_VALID_ENABLED +#endif + +/** + * LIBXML_HTML_ENABLED: + * + * Whether the HTML support is configured in + */ +#if 1 +#define LIBXML_HTML_ENABLED +#endif + +/** + * LIBXML_LEGACY_ENABLED: + * + * Whether the deprecated APIs are compiled in for compatibility + */ +#if 0 +#define LIBXML_LEGACY_ENABLED +#endif + +/** + * LIBXML_C14N_ENABLED: + * + * Whether the Canonicalization support is configured in + */ +#if 1 +#define LIBXML_C14N_ENABLED +#endif + +/** + * LIBXML_CATALOG_ENABLED: + * + * Whether the Catalog support is configured in + */ +#if 1 +#define LIBXML_CATALOG_ENABLED +#endif + +/** + * LIBXML_XPATH_ENABLED: + * + * Whether XPath is configured in + */ +#if 1 +#define LIBXML_XPATH_ENABLED +#endif + +/** + * LIBXML_XPTR_ENABLED: + * + * Whether XPointer is configured in + */ +#if 1 +#define LIBXML_XPTR_ENABLED +#endif + +/** + * LIBXML_XPTR_LOCS_ENABLED: + * + * Whether support for XPointer locations is configured in + */ +#if 0 +#define LIBXML_XPTR_LOCS_ENABLED +#endif + +/** + * LIBXML_XINCLUDE_ENABLED: + * + * Whether XInclude is configured in + */ +#if 1 +#define LIBXML_XINCLUDE_ENABLED +#endif + +/** + * LIBXML_ICONV_ENABLED: + * + * Whether iconv support is available + */ +#if 1 +#define LIBXML_ICONV_ENABLED +#endif + +/** + * LIBXML_ICU_ENABLED: + * + * Whether icu support is available + */ +#if 0 +#define LIBXML_ICU_ENABLED +#endif + +/** + * LIBXML_ISO8859X_ENABLED: + * + * Whether ISO-8859-* support is made available in case iconv is not + */ +#if 1 +#define LIBXML_ISO8859X_ENABLED +#endif + +/** + * LIBXML_DEBUG_ENABLED: + * + * Whether Debugging module is configured in + */ +#if 1 +#define LIBXML_DEBUG_ENABLED +#endif + +/** + * DEBUG_MEMORY_LOCATION: + * + * Whether the memory debugging is configured in + */ +#if 0 +#define DEBUG_MEMORY_LOCATION +#endif + +/** + * LIBXML_DEBUG_RUNTIME: + * + * Removed + */ +#if 0 +#define LIBXML_DEBUG_RUNTIME +#endif + +/** + * LIBXML_UNICODE_ENABLED: + * + * Whether the Unicode related interfaces are compiled in + */ +#if 1 +#define LIBXML_UNICODE_ENABLED +#endif + +/** + * LIBXML_REGEXP_ENABLED: + * + * Whether the regular expressions interfaces are compiled in + */ +#if 1 +#define LIBXML_REGEXP_ENABLED +#endif + +/** + * LIBXML_AUTOMATA_ENABLED: + * + * Whether the automata interfaces are compiled in + */ +#if 1 +#define LIBXML_AUTOMATA_ENABLED +#endif + +/** + * LIBXML_EXPR_ENABLED: + * + * Whether the formal expressions interfaces are compiled in + * + * This code is unused and disabled unconditionally for now. + */ +#if 0 +#define LIBXML_EXPR_ENABLED +#endif + +/** + * LIBXML_SCHEMAS_ENABLED: + * + * Whether the Schemas validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMAS_ENABLED +#endif + +/** + * LIBXML_SCHEMATRON_ENABLED: + * + * Whether the Schematron validation interfaces are compiled in + */ +#if 1 +#define LIBXML_SCHEMATRON_ENABLED +#endif + +/** + * LIBXML_MODULES_ENABLED: + * + * Whether the module interfaces are compiled in + */ +#if 1 +#define LIBXML_MODULES_ENABLED +/** + * LIBXML_MODULE_EXTENSION: + * + * the string suffix used by dynamic modules (usually shared libraries) + */ +#define LIBXML_MODULE_EXTENSION ".so" +#endif + +/** + * LIBXML_ZLIB_ENABLED: + * + * Whether the Zlib support is compiled in + */ +#if 1 +#define LIBXML_ZLIB_ENABLED +#endif + +/** + * LIBXML_LZMA_ENABLED: + * + * Whether the Lzma support is compiled in + */ +#if 0 +#define LIBXML_LZMA_ENABLED +#endif + +#ifdef __GNUC__ +/** DOC_DISABLE */ + +#ifndef ATTRIBUTE_UNUSED +# if ((__GNUC__ > 2) || ((__GNUC__ == 2) && (__GNUC_MINOR__ >= 7))) +# define ATTRIBUTE_UNUSED __attribute__((unused)) +# else +# define ATTRIBUTE_UNUSED +# endif +#endif + +#ifndef LIBXML_ATTR_ALLOC_SIZE +# if (!defined(__clang__) && ((__GNUC__ > 4) || ((__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)))) +# define LIBXML_ATTR_ALLOC_SIZE(x) __attribute__((alloc_size(x))) +# else +# define LIBXML_ATTR_ALLOC_SIZE(x) +# endif +#else +# define LIBXML_ATTR_ALLOC_SIZE(x) +#endif + +#ifndef LIBXML_ATTR_FORMAT +# if ((__GNUC__ > 3) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 3))) +# define LIBXML_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args))) +# else +# define LIBXML_ATTR_FORMAT(fmt,args) +# endif +#else +# define LIBXML_ATTR_FORMAT(fmt,args) +#endif + +#ifndef XML_DEPRECATED +# if defined (IN_LIBXML) || (__GNUC__ * 100 + __GNUC_MINOR__ < 301) +# define XML_DEPRECATED +/* Available since at least GCC 3.1 */ +# else +# define XML_DEPRECATED __attribute__((deprecated)) +# endif +#endif + +#if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 406) + #if defined(__clang__) || (__GNUC__ * 100 + __GNUC_MINOR__ >= 800) + #define XML_IGNORE_FPTR_CAST_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") \ + _Pragma("GCC diagnostic ignored \"-Wcast-function-type\"") + #else + #define XML_IGNORE_FPTR_CAST_WARNINGS \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wpedantic\"") + #endif + #define XML_POP_WARNINGS \ + _Pragma("GCC diagnostic pop") +#else + #define XML_IGNORE_FPTR_CAST_WARNINGS + #define XML_POP_WARNINGS +#endif + +#else /* ! __GNUC__ */ +#define ATTRIBUTE_UNUSED +#define LIBXML_ATTR_ALLOC_SIZE(x) +#define LIBXML_ATTR_FORMAT(fmt,args) +#ifndef XML_DEPRECATED +# if defined (IN_LIBXML) || !defined (_MSC_VER) +# define XML_DEPRECATED +/* Available since Visual Studio 2005 */ +# elif defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_DEPRECATED __declspec(deprecated) +# endif +#endif +#if defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_IGNORE_FPTR_CAST_WARNINGS __pragma(warning(push)) +#else +# define XML_IGNORE_FPTR_CAST_WARNINGS +#endif +#ifndef XML_POP_WARNINGS +# if defined (_MSC_VER) && (_MSC_VER >= 1400) +# define XML_POP_WARNINGS __pragma(warning(pop)) +# else +# define XML_POP_WARNINGS +# endif +#endif +#endif /* __GNUC__ */ + +#define XML_NO_ATTR + +#ifdef LIBXML_THREAD_ENABLED + #define XML_DECLARE_GLOBAL(name, type, attrs) \ + attrs XMLPUBFUN type *__##name(void); + #define XML_GLOBAL_MACRO(name) (*__##name()) +#else + #define XML_DECLARE_GLOBAL(name, type, attrs) \ + attrs XMLPUBVAR type name; +#endif + +#ifdef __cplusplus +} +#endif /* __cplusplus */ +#endif + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlwriter.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlwriter.h new file mode 100644 index 00000000..339f2511 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xmlwriter.h @@ -0,0 +1,488 @@ +/* + * Summary: text writing API for XML + * Description: text writing API for XML + * + * Copy: See Copyright for the status of this software. + * + * Author: Alfred Mickautsch <alfred@mickautsch.de> + */ + +#ifndef __XML_XMLWRITER_H__ +#define __XML_XMLWRITER_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_WRITER_ENABLED + +#include <stdarg.h> +#include <libxml/xmlIO.h> +#include <libxml/list.h> +#include <libxml/xmlstring.h> + +#ifdef __cplusplus +extern "C" { +#endif + + typedef struct _xmlTextWriter xmlTextWriter; + typedef xmlTextWriter *xmlTextWriterPtr; + +/* + * Constructors & Destructor + */ + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriter(xmlOutputBufferPtr out); + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriterFilename(const char *uri, int compression); + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriterMemory(xmlBufferPtr buf, int compression); + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriterPushParser(xmlParserCtxtPtr ctxt, int compression); + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriterDoc(xmlDocPtr * doc, int compression); + XMLPUBFUN xmlTextWriterPtr + xmlNewTextWriterTree(xmlDocPtr doc, xmlNodePtr node, + int compression); + XMLPUBFUN void xmlFreeTextWriter(xmlTextWriterPtr writer); + +/* + * Functions + */ + + +/* + * Document + */ + XMLPUBFUN int + xmlTextWriterStartDocument(xmlTextWriterPtr writer, + const char *version, + const char *encoding, + const char *standalone); + XMLPUBFUN int xmlTextWriterEndDocument(xmlTextWriterPtr + writer); + +/* + * Comments + */ + XMLPUBFUN int xmlTextWriterStartComment(xmlTextWriterPtr + writer); + XMLPUBFUN int xmlTextWriterEndComment(xmlTextWriterPtr writer); + XMLPUBFUN int + xmlTextWriterWriteFormatComment(xmlTextWriterPtr writer, + const char *format, ...) + LIBXML_ATTR_FORMAT(2,3); + XMLPUBFUN int + xmlTextWriterWriteVFormatComment(xmlTextWriterPtr writer, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(2,0); + XMLPUBFUN int xmlTextWriterWriteComment(xmlTextWriterPtr + writer, + const xmlChar * + content); + +/* + * Elements + */ + XMLPUBFUN int + xmlTextWriterStartElement(xmlTextWriterPtr writer, + const xmlChar * name); + XMLPUBFUN int xmlTextWriterStartElementNS(xmlTextWriterPtr + writer, + const xmlChar * + prefix, + const xmlChar * name, + const xmlChar * + namespaceURI); + XMLPUBFUN int xmlTextWriterEndElement(xmlTextWriterPtr writer); + XMLPUBFUN int xmlTextWriterFullEndElement(xmlTextWriterPtr + writer); + +/* + * Elements conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatElement(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, ...) + LIBXML_ATTR_FORMAT(3,4); + XMLPUBFUN int + xmlTextWriterWriteVFormatElement(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(3,0); + XMLPUBFUN int xmlTextWriterWriteElement(xmlTextWriterPtr + writer, + const xmlChar * name, + const xmlChar * + content); + XMLPUBFUN int + xmlTextWriterWriteFormatElementNS(xmlTextWriterPtr writer, + const xmlChar * prefix, + const xmlChar * name, + const xmlChar * namespaceURI, + const char *format, ...) + LIBXML_ATTR_FORMAT(5,6); + XMLPUBFUN int + xmlTextWriterWriteVFormatElementNS(xmlTextWriterPtr writer, + const xmlChar * prefix, + const xmlChar * name, + const xmlChar * namespaceURI, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(5,0); + XMLPUBFUN int xmlTextWriterWriteElementNS(xmlTextWriterPtr + writer, + const xmlChar * + prefix, + const xmlChar * name, + const xmlChar * + namespaceURI, + const xmlChar * + content); + +/* + * Text + */ + XMLPUBFUN int + xmlTextWriterWriteFormatRaw(xmlTextWriterPtr writer, + const char *format, ...) + LIBXML_ATTR_FORMAT(2,3); + XMLPUBFUN int + xmlTextWriterWriteVFormatRaw(xmlTextWriterPtr writer, + const char *format, va_list argptr) + LIBXML_ATTR_FORMAT(2,0); + XMLPUBFUN int + xmlTextWriterWriteRawLen(xmlTextWriterPtr writer, + const xmlChar * content, int len); + XMLPUBFUN int + xmlTextWriterWriteRaw(xmlTextWriterPtr writer, + const xmlChar * content); + XMLPUBFUN int xmlTextWriterWriteFormatString(xmlTextWriterPtr + writer, + const char + *format, ...) + LIBXML_ATTR_FORMAT(2,3); + XMLPUBFUN int xmlTextWriterWriteVFormatString(xmlTextWriterPtr + writer, + const char + *format, + va_list argptr) + LIBXML_ATTR_FORMAT(2,0); + XMLPUBFUN int xmlTextWriterWriteString(xmlTextWriterPtr writer, + const xmlChar * + content); + XMLPUBFUN int xmlTextWriterWriteBase64(xmlTextWriterPtr writer, + const char *data, + int start, int len); + XMLPUBFUN int xmlTextWriterWriteBinHex(xmlTextWriterPtr writer, + const char *data, + int start, int len); + +/* + * Attributes + */ + XMLPUBFUN int + xmlTextWriterStartAttribute(xmlTextWriterPtr writer, + const xmlChar * name); + XMLPUBFUN int xmlTextWriterStartAttributeNS(xmlTextWriterPtr + writer, + const xmlChar * + prefix, + const xmlChar * + name, + const xmlChar * + namespaceURI); + XMLPUBFUN int xmlTextWriterEndAttribute(xmlTextWriterPtr + writer); + +/* + * Attributes conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatAttribute(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, ...) + LIBXML_ATTR_FORMAT(3,4); + XMLPUBFUN int + xmlTextWriterWriteVFormatAttribute(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(3,0); + XMLPUBFUN int xmlTextWriterWriteAttribute(xmlTextWriterPtr + writer, + const xmlChar * name, + const xmlChar * + content); + XMLPUBFUN int + xmlTextWriterWriteFormatAttributeNS(xmlTextWriterPtr writer, + const xmlChar * prefix, + const xmlChar * name, + const xmlChar * namespaceURI, + const char *format, ...) + LIBXML_ATTR_FORMAT(5,6); + XMLPUBFUN int + xmlTextWriterWriteVFormatAttributeNS(xmlTextWriterPtr writer, + const xmlChar * prefix, + const xmlChar * name, + const xmlChar * namespaceURI, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(5,0); + XMLPUBFUN int xmlTextWriterWriteAttributeNS(xmlTextWriterPtr + writer, + const xmlChar * + prefix, + const xmlChar * + name, + const xmlChar * + namespaceURI, + const xmlChar * + content); + +/* + * PI's + */ + XMLPUBFUN int + xmlTextWriterStartPI(xmlTextWriterPtr writer, + const xmlChar * target); + XMLPUBFUN int xmlTextWriterEndPI(xmlTextWriterPtr writer); + +/* + * PI conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatPI(xmlTextWriterPtr writer, + const xmlChar * target, + const char *format, ...) + LIBXML_ATTR_FORMAT(3,4); + XMLPUBFUN int + xmlTextWriterWriteVFormatPI(xmlTextWriterPtr writer, + const xmlChar * target, + const char *format, va_list argptr) + LIBXML_ATTR_FORMAT(3,0); + XMLPUBFUN int + xmlTextWriterWritePI(xmlTextWriterPtr writer, + const xmlChar * target, + const xmlChar * content); + +/** + * xmlTextWriterWriteProcessingInstruction: + * + * This macro maps to xmlTextWriterWritePI + */ +#define xmlTextWriterWriteProcessingInstruction xmlTextWriterWritePI + +/* + * CDATA + */ + XMLPUBFUN int xmlTextWriterStartCDATA(xmlTextWriterPtr writer); + XMLPUBFUN int xmlTextWriterEndCDATA(xmlTextWriterPtr writer); + +/* + * CDATA conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatCDATA(xmlTextWriterPtr writer, + const char *format, ...) + LIBXML_ATTR_FORMAT(2,3); + XMLPUBFUN int + xmlTextWriterWriteVFormatCDATA(xmlTextWriterPtr writer, + const char *format, va_list argptr) + LIBXML_ATTR_FORMAT(2,0); + XMLPUBFUN int + xmlTextWriterWriteCDATA(xmlTextWriterPtr writer, + const xmlChar * content); + +/* + * DTD + */ + XMLPUBFUN int + xmlTextWriterStartDTD(xmlTextWriterPtr writer, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid); + XMLPUBFUN int xmlTextWriterEndDTD(xmlTextWriterPtr writer); + +/* + * DTD conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatDTD(xmlTextWriterPtr writer, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid, + const char *format, ...) + LIBXML_ATTR_FORMAT(5,6); + XMLPUBFUN int + xmlTextWriterWriteVFormatDTD(xmlTextWriterPtr writer, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid, + const char *format, va_list argptr) + LIBXML_ATTR_FORMAT(5,0); + XMLPUBFUN int + xmlTextWriterWriteDTD(xmlTextWriterPtr writer, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid, + const xmlChar * subset); + +/** + * xmlTextWriterWriteDocType: + * + * this macro maps to xmlTextWriterWriteDTD + */ +#define xmlTextWriterWriteDocType xmlTextWriterWriteDTD + +/* + * DTD element definition + */ + XMLPUBFUN int + xmlTextWriterStartDTDElement(xmlTextWriterPtr writer, + const xmlChar * name); + XMLPUBFUN int xmlTextWriterEndDTDElement(xmlTextWriterPtr + writer); + +/* + * DTD element definition conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatDTDElement(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, ...) + LIBXML_ATTR_FORMAT(3,4); + XMLPUBFUN int + xmlTextWriterWriteVFormatDTDElement(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(3,0); + XMLPUBFUN int xmlTextWriterWriteDTDElement(xmlTextWriterPtr + writer, + const xmlChar * + name, + const xmlChar * + content); + +/* + * DTD attribute list definition + */ + XMLPUBFUN int + xmlTextWriterStartDTDAttlist(xmlTextWriterPtr writer, + const xmlChar * name); + XMLPUBFUN int xmlTextWriterEndDTDAttlist(xmlTextWriterPtr + writer); + +/* + * DTD attribute list definition conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatDTDAttlist(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, ...) + LIBXML_ATTR_FORMAT(3,4); + XMLPUBFUN int + xmlTextWriterWriteVFormatDTDAttlist(xmlTextWriterPtr writer, + const xmlChar * name, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(3,0); + XMLPUBFUN int xmlTextWriterWriteDTDAttlist(xmlTextWriterPtr + writer, + const xmlChar * + name, + const xmlChar * + content); + +/* + * DTD entity definition + */ + XMLPUBFUN int + xmlTextWriterStartDTDEntity(xmlTextWriterPtr writer, + int pe, const xmlChar * name); + XMLPUBFUN int xmlTextWriterEndDTDEntity(xmlTextWriterPtr + writer); + +/* + * DTD entity definition conveniency functions + */ + XMLPUBFUN int + xmlTextWriterWriteFormatDTDInternalEntity(xmlTextWriterPtr writer, + int pe, + const xmlChar * name, + const char *format, ...) + LIBXML_ATTR_FORMAT(4,5); + XMLPUBFUN int + xmlTextWriterWriteVFormatDTDInternalEntity(xmlTextWriterPtr writer, + int pe, + const xmlChar * name, + const char *format, + va_list argptr) + LIBXML_ATTR_FORMAT(4,0); + XMLPUBFUN int + xmlTextWriterWriteDTDInternalEntity(xmlTextWriterPtr writer, + int pe, + const xmlChar * name, + const xmlChar * content); + XMLPUBFUN int + xmlTextWriterWriteDTDExternalEntity(xmlTextWriterPtr writer, + int pe, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid, + const xmlChar * ndataid); + XMLPUBFUN int + xmlTextWriterWriteDTDExternalEntityContents(xmlTextWriterPtr + writer, + const xmlChar * pubid, + const xmlChar * sysid, + const xmlChar * + ndataid); + XMLPUBFUN int xmlTextWriterWriteDTDEntity(xmlTextWriterPtr + writer, int pe, + const xmlChar * name, + const xmlChar * + pubid, + const xmlChar * + sysid, + const xmlChar * + ndataid, + const xmlChar * + content); + +/* + * DTD notation definition + */ + XMLPUBFUN int + xmlTextWriterWriteDTDNotation(xmlTextWriterPtr writer, + const xmlChar * name, + const xmlChar * pubid, + const xmlChar * sysid); + +/* + * Indentation + */ + XMLPUBFUN int + xmlTextWriterSetIndent(xmlTextWriterPtr writer, int indent); + XMLPUBFUN int + xmlTextWriterSetIndentString(xmlTextWriterPtr writer, + const xmlChar * str); + + XMLPUBFUN int + xmlTextWriterSetQuoteChar(xmlTextWriterPtr writer, xmlChar quotechar); + + +/* + * misc + */ + XMLPUBFUN int xmlTextWriterFlush(xmlTextWriterPtr writer); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_WRITER_ENABLED */ + +#endif /* __XML_XMLWRITER_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpath.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpath.h new file mode 100644 index 00000000..6dae0780 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpath.h @@ -0,0 +1,575 @@ +/* + * Summary: XML Path Language implementation + * Description: API for the XML Path Language implementation + * + * XML Path Language implementation + * XPath is a language for addressing parts of an XML document, + * designed to be used by both XSLT and XPointer + * http://www.w3.org/TR/xpath + * + * Implements + * W3C Recommendation 16 November 1999 + * http://www.w3.org/TR/1999/REC-xpath-19991116 + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XPATH_H__ +#define __XML_XPATH_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_XPATH_ENABLED + +#include <libxml/xmlerror.h> +#include <libxml/tree.h> +#include <libxml/hash.h> +#endif /* LIBXML_XPATH_ENABLED */ + +#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +#ifdef __cplusplus +extern "C" { +#endif +#endif /* LIBXML_XPATH_ENABLED or LIBXML_SCHEMAS_ENABLED */ + +#ifdef LIBXML_XPATH_ENABLED + +typedef struct _xmlXPathContext xmlXPathContext; +typedef xmlXPathContext *xmlXPathContextPtr; +typedef struct _xmlXPathParserContext xmlXPathParserContext; +typedef xmlXPathParserContext *xmlXPathParserContextPtr; + +/** + * The set of XPath error codes. + */ + +typedef enum { + XPATH_EXPRESSION_OK = 0, + XPATH_NUMBER_ERROR, + XPATH_UNFINISHED_LITERAL_ERROR, + XPATH_START_LITERAL_ERROR, + XPATH_VARIABLE_REF_ERROR, + XPATH_UNDEF_VARIABLE_ERROR, + XPATH_INVALID_PREDICATE_ERROR, + XPATH_EXPR_ERROR, + XPATH_UNCLOSED_ERROR, + XPATH_UNKNOWN_FUNC_ERROR, + XPATH_INVALID_OPERAND, + XPATH_INVALID_TYPE, + XPATH_INVALID_ARITY, + XPATH_INVALID_CTXT_SIZE, + XPATH_INVALID_CTXT_POSITION, + XPATH_MEMORY_ERROR, + XPTR_SYNTAX_ERROR, + XPTR_RESOURCE_ERROR, + XPTR_SUB_RESOURCE_ERROR, + XPATH_UNDEF_PREFIX_ERROR, + XPATH_ENCODING_ERROR, + XPATH_INVALID_CHAR_ERROR, + XPATH_INVALID_CTXT, + XPATH_STACK_ERROR, + XPATH_FORBID_VARIABLE_ERROR, + XPATH_OP_LIMIT_EXCEEDED, + XPATH_RECURSION_LIMIT_EXCEEDED +} xmlXPathError; + +/* + * A node-set (an unordered collection of nodes without duplicates). + */ +typedef struct _xmlNodeSet xmlNodeSet; +typedef xmlNodeSet *xmlNodeSetPtr; +struct _xmlNodeSet { + int nodeNr; /* number of nodes in the set */ + int nodeMax; /* size of the array as allocated */ + xmlNodePtr *nodeTab; /* array of nodes in no particular order */ + /* @@ with_ns to check whether namespace nodes should be looked at @@ */ +}; + +/* + * An expression is evaluated to yield an object, which + * has one of the following four basic types: + * - node-set + * - boolean + * - number + * - string + * + * @@ XPointer will add more types ! + */ + +typedef enum { + XPATH_UNDEFINED = 0, + XPATH_NODESET = 1, + XPATH_BOOLEAN = 2, + XPATH_NUMBER = 3, + XPATH_STRING = 4, +#ifdef LIBXML_XPTR_LOCS_ENABLED + XPATH_POINT = 5, + XPATH_RANGE = 6, + XPATH_LOCATIONSET = 7, +#endif + XPATH_USERS = 8, + XPATH_XSLT_TREE = 9 /* An XSLT value tree, non modifiable */ +} xmlXPathObjectType; + +#ifndef LIBXML_XPTR_LOCS_ENABLED +/** DOC_DISABLE */ +#define XPATH_POINT 5 +#define XPATH_RANGE 6 +#define XPATH_LOCATIONSET 7 +/** DOC_ENABLE */ +#endif + +typedef struct _xmlXPathObject xmlXPathObject; +typedef xmlXPathObject *xmlXPathObjectPtr; +struct _xmlXPathObject { + xmlXPathObjectType type; + xmlNodeSetPtr nodesetval; + int boolval; + double floatval; + xmlChar *stringval; + void *user; + int index; + void *user2; + int index2; +}; + +/** + * xmlXPathConvertFunc: + * @obj: an XPath object + * @type: the number of the target type + * + * A conversion function is associated to a type and used to cast + * the new type to primitive values. + * + * Returns -1 in case of error, 0 otherwise + */ +typedef int (*xmlXPathConvertFunc) (xmlXPathObjectPtr obj, int type); + +/* + * Extra type: a name and a conversion function. + */ + +typedef struct _xmlXPathType xmlXPathType; +typedef xmlXPathType *xmlXPathTypePtr; +struct _xmlXPathType { + const xmlChar *name; /* the type name */ + xmlXPathConvertFunc func; /* the conversion function */ +}; + +/* + * Extra variable: a name and a value. + */ + +typedef struct _xmlXPathVariable xmlXPathVariable; +typedef xmlXPathVariable *xmlXPathVariablePtr; +struct _xmlXPathVariable { + const xmlChar *name; /* the variable name */ + xmlXPathObjectPtr value; /* the value */ +}; + +/** + * xmlXPathEvalFunc: + * @ctxt: an XPath parser context + * @nargs: the number of arguments passed to the function + * + * An XPath evaluation function, the parameters are on the XPath context stack. + */ + +typedef void (*xmlXPathEvalFunc)(xmlXPathParserContextPtr ctxt, + int nargs); + +/* + * Extra function: a name and a evaluation function. + */ + +typedef struct _xmlXPathFunct xmlXPathFunct; +typedef xmlXPathFunct *xmlXPathFuncPtr; +struct _xmlXPathFunct { + const xmlChar *name; /* the function name */ + xmlXPathEvalFunc func; /* the evaluation function */ +}; + +/** + * xmlXPathAxisFunc: + * @ctxt: the XPath interpreter context + * @cur: the previous node being explored on that axis + * + * An axis traversal function. To traverse an axis, the engine calls + * the first time with cur == NULL and repeat until the function returns + * NULL indicating the end of the axis traversal. + * + * Returns the next node in that axis or NULL if at the end of the axis. + */ + +typedef xmlXPathObjectPtr (*xmlXPathAxisFunc) (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr cur); + +/* + * Extra axis: a name and an axis function. + */ + +typedef struct _xmlXPathAxis xmlXPathAxis; +typedef xmlXPathAxis *xmlXPathAxisPtr; +struct _xmlXPathAxis { + const xmlChar *name; /* the axis name */ + xmlXPathAxisFunc func; /* the search function */ +}; + +/** + * xmlXPathFunction: + * @ctxt: the XPath interprestation context + * @nargs: the number of arguments + * + * An XPath function. + * The arguments (if any) are popped out from the context stack + * and the result is pushed on the stack. + */ + +typedef void (*xmlXPathFunction) (xmlXPathParserContextPtr ctxt, int nargs); + +/* + * Function and Variable Lookup. + */ + +/** + * xmlXPathVariableLookupFunc: + * @ctxt: an XPath context + * @name: name of the variable + * @ns_uri: the namespace name hosting this variable + * + * Prototype for callbacks used to plug variable lookup in the XPath + * engine. + * + * Returns the XPath object value or NULL if not found. + */ +typedef xmlXPathObjectPtr (*xmlXPathVariableLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +/** + * xmlXPathFuncLookupFunc: + * @ctxt: an XPath context + * @name: name of the function + * @ns_uri: the namespace name hosting this function + * + * Prototype for callbacks used to plug function lookup in the XPath + * engine. + * + * Returns the XPath function or NULL if not found. + */ +typedef xmlXPathFunction (*xmlXPathFuncLookupFunc) (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +/** + * xmlXPathFlags: + * Flags for XPath engine compilation and runtime + */ +/** + * XML_XPATH_CHECKNS: + * + * check namespaces at compilation + */ +#define XML_XPATH_CHECKNS (1<<0) +/** + * XML_XPATH_NOVAR: + * + * forbid variables in expression + */ +#define XML_XPATH_NOVAR (1<<1) + +/** + * xmlXPathContext: + * + * Expression evaluation occurs with respect to a context. + * he context consists of: + * - a node (the context node) + * - a node list (the context node list) + * - a set of variable bindings + * - a function library + * - the set of namespace declarations in scope for the expression + * Following the switch to hash tables, this need to be trimmed up at + * the next binary incompatible release. + * The node may be modified when the context is passed to libxml2 + * for an XPath evaluation so you may need to initialize it again + * before the next call. + */ + +struct _xmlXPathContext { + xmlDocPtr doc; /* The current document */ + xmlNodePtr node; /* The current node */ + + int nb_variables_unused; /* unused (hash table) */ + int max_variables_unused; /* unused (hash table) */ + xmlHashTablePtr varHash; /* Hash table of defined variables */ + + int nb_types; /* number of defined types */ + int max_types; /* max number of types */ + xmlXPathTypePtr types; /* Array of defined types */ + + int nb_funcs_unused; /* unused (hash table) */ + int max_funcs_unused; /* unused (hash table) */ + xmlHashTablePtr funcHash; /* Hash table of defined funcs */ + + int nb_axis; /* number of defined axis */ + int max_axis; /* max number of axis */ + xmlXPathAxisPtr axis; /* Array of defined axis */ + + /* the namespace nodes of the context node */ + xmlNsPtr *namespaces; /* Array of namespaces */ + int nsNr; /* number of namespace in scope */ + void *user; /* function to free */ + + /* extra variables */ + int contextSize; /* the context size */ + int proximityPosition; /* the proximity position */ + + /* extra stuff for XPointer */ + int xptr; /* is this an XPointer context? */ + xmlNodePtr here; /* for here() */ + xmlNodePtr origin; /* for origin() */ + + /* the set of namespace declarations in scope for the expression */ + xmlHashTablePtr nsHash; /* The namespaces hash table */ + xmlXPathVariableLookupFunc varLookupFunc;/* variable lookup func */ + void *varLookupData; /* variable lookup data */ + + /* Possibility to link in an extra item */ + void *extra; /* needed for XSLT */ + + /* The function name and URI when calling a function */ + const xmlChar *function; + const xmlChar *functionURI; + + /* function lookup function and data */ + xmlXPathFuncLookupFunc funcLookupFunc;/* function lookup func */ + void *funcLookupData; /* function lookup data */ + + /* temporary namespace lists kept for walking the namespace axis */ + xmlNsPtr *tmpNsList; /* Array of namespaces */ + int tmpNsNr; /* number of namespaces in scope */ + + /* error reporting mechanism */ + void *userData; /* user specific data block */ + xmlStructuredErrorFunc error; /* the callback in case of errors */ + xmlError lastError; /* the last error */ + xmlNodePtr debugNode; /* the source node XSLT */ + + /* dictionary */ + xmlDictPtr dict; /* dictionary if any */ + + int flags; /* flags to control compilation */ + + /* Cache for reusal of XPath objects */ + void *cache; + + /* Resource limits */ + unsigned long opLimit; + unsigned long opCount; + int depth; +}; + +/* + * The structure of a compiled expression form is not public. + */ + +typedef struct _xmlXPathCompExpr xmlXPathCompExpr; +typedef xmlXPathCompExpr *xmlXPathCompExprPtr; + +/** + * xmlXPathParserContext: + * + * An XPath parser context. It contains pure parsing information, + * an xmlXPathContext, and the stack of objects. + */ +struct _xmlXPathParserContext { + const xmlChar *cur; /* the current char being parsed */ + const xmlChar *base; /* the full expression */ + + int error; /* error code */ + + xmlXPathContextPtr context; /* the evaluation context */ + xmlXPathObjectPtr value; /* the current value */ + int valueNr; /* number of values stacked */ + int valueMax; /* max number of values stacked */ + xmlXPathObjectPtr *valueTab; /* stack of values */ + + xmlXPathCompExprPtr comp; /* the precompiled expression */ + int xptr; /* it this an XPointer expression */ + xmlNodePtr ancestor; /* used for walking preceding axis */ + + int valueFrame; /* always zero for compatibility */ +}; + +/************************************************************************ + * * + * Public API * + * * + ************************************************************************/ + +/** + * Objects and Nodesets handling + */ + +XMLPUBVAR double xmlXPathNAN; +XMLPUBVAR double xmlXPathPINF; +XMLPUBVAR double xmlXPathNINF; + +/* These macros may later turn into functions */ +/** + * xmlXPathNodeSetGetLength: + * @ns: a node-set + * + * Implement a functionality similar to the DOM NodeList.length. + * + * Returns the number of nodes in the node-set. + */ +#define xmlXPathNodeSetGetLength(ns) ((ns) ? (ns)->nodeNr : 0) +/** + * xmlXPathNodeSetItem: + * @ns: a node-set + * @index: index of a node in the set + * + * Implements a functionality similar to the DOM NodeList.item(). + * + * Returns the xmlNodePtr at the given @index in @ns or NULL if + * @index is out of range (0 to length-1) + */ +#define xmlXPathNodeSetItem(ns, index) \ + ((((ns) != NULL) && \ + ((index) >= 0) && ((index) < (ns)->nodeNr)) ? \ + (ns)->nodeTab[(index)] \ + : NULL) +/** + * xmlXPathNodeSetIsEmpty: + * @ns: a node-set + * + * Checks whether @ns is empty or not. + * + * Returns %TRUE if @ns is an empty node-set. + */ +#define xmlXPathNodeSetIsEmpty(ns) \ + (((ns) == NULL) || ((ns)->nodeNr == 0) || ((ns)->nodeTab == NULL)) + + +XMLPUBFUN void + xmlXPathFreeObject (xmlXPathObjectPtr obj); +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeSetCreate (xmlNodePtr val); +XMLPUBFUN void + xmlXPathFreeNodeSetList (xmlXPathObjectPtr obj); +XMLPUBFUN void + xmlXPathFreeNodeSet (xmlNodeSetPtr obj); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathObjectCopy (xmlXPathObjectPtr val); +XMLPUBFUN int + xmlXPathCmpNodes (xmlNodePtr node1, + xmlNodePtr node2); +/** + * Conversion functions to basic types. + */ +XMLPUBFUN int + xmlXPathCastNumberToBoolean (double val); +XMLPUBFUN int + xmlXPathCastStringToBoolean (const xmlChar * val); +XMLPUBFUN int + xmlXPathCastNodeSetToBoolean(xmlNodeSetPtr ns); +XMLPUBFUN int + xmlXPathCastToBoolean (xmlXPathObjectPtr val); + +XMLPUBFUN double + xmlXPathCastBooleanToNumber (int val); +XMLPUBFUN double + xmlXPathCastStringToNumber (const xmlChar * val); +XMLPUBFUN double + xmlXPathCastNodeToNumber (xmlNodePtr node); +XMLPUBFUN double + xmlXPathCastNodeSetToNumber (xmlNodeSetPtr ns); +XMLPUBFUN double + xmlXPathCastToNumber (xmlXPathObjectPtr val); + +XMLPUBFUN xmlChar * + xmlXPathCastBooleanToString (int val); +XMLPUBFUN xmlChar * + xmlXPathCastNumberToString (double val); +XMLPUBFUN xmlChar * + xmlXPathCastNodeToString (xmlNodePtr node); +XMLPUBFUN xmlChar * + xmlXPathCastNodeSetToString (xmlNodeSetPtr ns); +XMLPUBFUN xmlChar * + xmlXPathCastToString (xmlXPathObjectPtr val); + +XMLPUBFUN xmlXPathObjectPtr + xmlXPathConvertBoolean (xmlXPathObjectPtr val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathConvertNumber (xmlXPathObjectPtr val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathConvertString (xmlXPathObjectPtr val); + +/** + * Context handling. + */ +XMLPUBFUN xmlXPathContextPtr + xmlXPathNewContext (xmlDocPtr doc); +XMLPUBFUN void + xmlXPathFreeContext (xmlXPathContextPtr ctxt); +XMLPUBFUN int + xmlXPathContextSetCache(xmlXPathContextPtr ctxt, + int active, + int value, + int options); +/** + * Evaluation functions. + */ +XMLPUBFUN long + xmlXPathOrderDocElems (xmlDocPtr doc); +XMLPUBFUN int + xmlXPathSetContextNode (xmlNodePtr node, + xmlXPathContextPtr ctx); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNodeEval (xmlNodePtr node, + const xmlChar *str, + xmlXPathContextPtr ctx); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathEval (const xmlChar *str, + xmlXPathContextPtr ctx); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathEvalExpression (const xmlChar *str, + xmlXPathContextPtr ctxt); +XMLPUBFUN int + xmlXPathEvalPredicate (xmlXPathContextPtr ctxt, + xmlXPathObjectPtr res); +/** + * Separate compilation/evaluation entry points. + */ +XMLPUBFUN xmlXPathCompExprPtr + xmlXPathCompile (const xmlChar *str); +XMLPUBFUN xmlXPathCompExprPtr + xmlXPathCtxtCompile (xmlXPathContextPtr ctxt, + const xmlChar *str); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathCompiledEval (xmlXPathCompExprPtr comp, + xmlXPathContextPtr ctx); +XMLPUBFUN int + xmlXPathCompiledEvalToBoolean(xmlXPathCompExprPtr comp, + xmlXPathContextPtr ctxt); +XMLPUBFUN void + xmlXPathFreeCompExpr (xmlXPathCompExprPtr comp); +#endif /* LIBXML_XPATH_ENABLED */ +#if defined(LIBXML_XPATH_ENABLED) || defined(LIBXML_SCHEMAS_ENABLED) +XML_DEPRECATED +XMLPUBFUN void + xmlXPathInit (void); +XMLPUBFUN int + xmlXPathIsNaN (double val); +XMLPUBFUN int + xmlXPathIsInf (double val); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_XPATH_ENABLED or LIBXML_SCHEMAS_ENABLED*/ +#endif /* ! __XML_XPATH_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpathInternals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpathInternals.h new file mode 100644 index 00000000..d1c90dff --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpathInternals.h @@ -0,0 +1,633 @@ +/* + * Summary: internal interfaces for XML Path Language implementation + * Description: internal interfaces for XML Path Language implementation + * used to build new modules on top of XPath like XPointer and + * XSLT + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XPATH_INTERNALS_H__ +#define __XML_XPATH_INTERNALS_H__ + +#include <stdio.h> +#include <libxml/xmlversion.h> +#include <libxml/xpath.h> + +#ifdef LIBXML_XPATH_ENABLED + +#ifdef __cplusplus +extern "C" { +#endif + +/************************************************************************ + * * + * Helpers * + * * + ************************************************************************/ + +/* + * Many of these macros may later turn into functions. They + * shouldn't be used in #ifdef's preprocessor instructions. + */ +/** + * xmlXPathSetError: + * @ctxt: an XPath parser context + * @err: an xmlXPathError code + * + * Raises an error. + */ +#define xmlXPathSetError(ctxt, err) \ + { xmlXPatherror((ctxt), __FILE__, __LINE__, (err)); \ + if ((ctxt) != NULL) (ctxt)->error = (err); } + +/** + * xmlXPathSetArityError: + * @ctxt: an XPath parser context + * + * Raises an XPATH_INVALID_ARITY error. + */ +#define xmlXPathSetArityError(ctxt) \ + xmlXPathSetError((ctxt), XPATH_INVALID_ARITY) + +/** + * xmlXPathSetTypeError: + * @ctxt: an XPath parser context + * + * Raises an XPATH_INVALID_TYPE error. + */ +#define xmlXPathSetTypeError(ctxt) \ + xmlXPathSetError((ctxt), XPATH_INVALID_TYPE) + +/** + * xmlXPathGetError: + * @ctxt: an XPath parser context + * + * Get the error code of an XPath context. + * + * Returns the context error. + */ +#define xmlXPathGetError(ctxt) ((ctxt)->error) + +/** + * xmlXPathCheckError: + * @ctxt: an XPath parser context + * + * Check if an XPath error was raised. + * + * Returns true if an error has been raised, false otherwise. + */ +#define xmlXPathCheckError(ctxt) ((ctxt)->error != XPATH_EXPRESSION_OK) + +/** + * xmlXPathGetDocument: + * @ctxt: an XPath parser context + * + * Get the document of an XPath context. + * + * Returns the context document. + */ +#define xmlXPathGetDocument(ctxt) ((ctxt)->context->doc) + +/** + * xmlXPathGetContextNode: + * @ctxt: an XPath parser context + * + * Get the context node of an XPath context. + * + * Returns the context node. + */ +#define xmlXPathGetContextNode(ctxt) ((ctxt)->context->node) + +XMLPUBFUN int + xmlXPathPopBoolean (xmlXPathParserContextPtr ctxt); +XMLPUBFUN double + xmlXPathPopNumber (xmlXPathParserContextPtr ctxt); +XMLPUBFUN xmlChar * + xmlXPathPopString (xmlXPathParserContextPtr ctxt); +XMLPUBFUN xmlNodeSetPtr + xmlXPathPopNodeSet (xmlXPathParserContextPtr ctxt); +XMLPUBFUN void * + xmlXPathPopExternal (xmlXPathParserContextPtr ctxt); + +/** + * xmlXPathReturnBoolean: + * @ctxt: an XPath parser context + * @val: a boolean + * + * Pushes the boolean @val on the context stack. + */ +#define xmlXPathReturnBoolean(ctxt, val) \ + valuePush((ctxt), xmlXPathNewBoolean(val)) + +/** + * xmlXPathReturnTrue: + * @ctxt: an XPath parser context + * + * Pushes true on the context stack. + */ +#define xmlXPathReturnTrue(ctxt) xmlXPathReturnBoolean((ctxt), 1) + +/** + * xmlXPathReturnFalse: + * @ctxt: an XPath parser context + * + * Pushes false on the context stack. + */ +#define xmlXPathReturnFalse(ctxt) xmlXPathReturnBoolean((ctxt), 0) + +/** + * xmlXPathReturnNumber: + * @ctxt: an XPath parser context + * @val: a double + * + * Pushes the double @val on the context stack. + */ +#define xmlXPathReturnNumber(ctxt, val) \ + valuePush((ctxt), xmlXPathNewFloat(val)) + +/** + * xmlXPathReturnString: + * @ctxt: an XPath parser context + * @str: a string + * + * Pushes the string @str on the context stack. + */ +#define xmlXPathReturnString(ctxt, str) \ + valuePush((ctxt), xmlXPathWrapString(str)) + +/** + * xmlXPathReturnEmptyString: + * @ctxt: an XPath parser context + * + * Pushes an empty string on the stack. + */ +#define xmlXPathReturnEmptyString(ctxt) \ + valuePush((ctxt), xmlXPathNewCString("")) + +/** + * xmlXPathReturnNodeSet: + * @ctxt: an XPath parser context + * @ns: a node-set + * + * Pushes the node-set @ns on the context stack. + */ +#define xmlXPathReturnNodeSet(ctxt, ns) \ + valuePush((ctxt), xmlXPathWrapNodeSet(ns)) + +/** + * xmlXPathReturnEmptyNodeSet: + * @ctxt: an XPath parser context + * + * Pushes an empty node-set on the context stack. + */ +#define xmlXPathReturnEmptyNodeSet(ctxt) \ + valuePush((ctxt), xmlXPathNewNodeSet(NULL)) + +/** + * xmlXPathReturnExternal: + * @ctxt: an XPath parser context + * @val: user data + * + * Pushes user data on the context stack. + */ +#define xmlXPathReturnExternal(ctxt, val) \ + valuePush((ctxt), xmlXPathWrapExternal(val)) + +/** + * xmlXPathStackIsNodeSet: + * @ctxt: an XPath parser context + * + * Check if the current value on the XPath stack is a node set or + * an XSLT value tree. + * + * Returns true if the current object on the stack is a node-set. + */ +#define xmlXPathStackIsNodeSet(ctxt) \ + (((ctxt)->value != NULL) \ + && (((ctxt)->value->type == XPATH_NODESET) \ + || ((ctxt)->value->type == XPATH_XSLT_TREE))) + +/** + * xmlXPathStackIsExternal: + * @ctxt: an XPath parser context + * + * Checks if the current value on the XPath stack is an external + * object. + * + * Returns true if the current object on the stack is an external + * object. + */ +#define xmlXPathStackIsExternal(ctxt) \ + ((ctxt->value != NULL) && (ctxt->value->type == XPATH_USERS)) + +/** + * xmlXPathEmptyNodeSet: + * @ns: a node-set + * + * Empties a node-set. + */ +#define xmlXPathEmptyNodeSet(ns) \ + { while ((ns)->nodeNr > 0) (ns)->nodeTab[--(ns)->nodeNr] = NULL; } + +/** + * CHECK_ERROR: + * + * Macro to return from the function if an XPath error was detected. + */ +#define CHECK_ERROR \ + if (ctxt->error != XPATH_EXPRESSION_OK) return + +/** + * CHECK_ERROR0: + * + * Macro to return 0 from the function if an XPath error was detected. + */ +#define CHECK_ERROR0 \ + if (ctxt->error != XPATH_EXPRESSION_OK) return(0) + +/** + * XP_ERROR: + * @X: the error code + * + * Macro to raise an XPath error and return. + */ +#define XP_ERROR(X) \ + { xmlXPathErr(ctxt, X); return; } + +/** + * XP_ERROR0: + * @X: the error code + * + * Macro to raise an XPath error and return 0. + */ +#define XP_ERROR0(X) \ + { xmlXPathErr(ctxt, X); return(0); } + +/** + * CHECK_TYPE: + * @typeval: the XPath type + * + * Macro to check that the value on top of the XPath stack is of a given + * type. + */ +#define CHECK_TYPE(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR(XPATH_INVALID_TYPE) + +/** + * CHECK_TYPE0: + * @typeval: the XPath type + * + * Macro to check that the value on top of the XPath stack is of a given + * type. Return(0) in case of failure + */ +#define CHECK_TYPE0(typeval) \ + if ((ctxt->value == NULL) || (ctxt->value->type != typeval)) \ + XP_ERROR0(XPATH_INVALID_TYPE) + +/** + * CHECK_ARITY: + * @x: the number of expected args + * + * Macro to check that the number of args passed to an XPath function matches. + */ +#define CHECK_ARITY(x) \ + if (ctxt == NULL) return; \ + if (nargs != (x)) \ + XP_ERROR(XPATH_INVALID_ARITY); \ + if (ctxt->valueNr < (x)) \ + XP_ERROR(XPATH_STACK_ERROR); + +/** + * CAST_TO_STRING: + * + * Macro to try to cast the value on the top of the XPath stack to a string. + */ +#define CAST_TO_STRING \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_STRING)) \ + xmlXPathStringFunction(ctxt, 1); + +/** + * CAST_TO_NUMBER: + * + * Macro to try to cast the value on the top of the XPath stack to a number. + */ +#define CAST_TO_NUMBER \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_NUMBER)) \ + xmlXPathNumberFunction(ctxt, 1); + +/** + * CAST_TO_BOOLEAN: + * + * Macro to try to cast the value on the top of the XPath stack to a boolean. + */ +#define CAST_TO_BOOLEAN \ + if ((ctxt->value != NULL) && (ctxt->value->type != XPATH_BOOLEAN)) \ + xmlXPathBooleanFunction(ctxt, 1); + +/* + * Variable Lookup forwarding. + */ + +XMLPUBFUN void + xmlXPathRegisterVariableLookup (xmlXPathContextPtr ctxt, + xmlXPathVariableLookupFunc f, + void *data); + +/* + * Function Lookup forwarding. + */ + +XMLPUBFUN void + xmlXPathRegisterFuncLookup (xmlXPathContextPtr ctxt, + xmlXPathFuncLookupFunc f, + void *funcCtxt); + +/* + * Error reporting. + */ +XMLPUBFUN void + xmlXPatherror (xmlXPathParserContextPtr ctxt, + const char *file, + int line, + int no); + +XMLPUBFUN void + xmlXPathErr (xmlXPathParserContextPtr ctxt, + int error); + +#ifdef LIBXML_DEBUG_ENABLED +XMLPUBFUN void + xmlXPathDebugDumpObject (FILE *output, + xmlXPathObjectPtr cur, + int depth); +XMLPUBFUN void + xmlXPathDebugDumpCompExpr(FILE *output, + xmlXPathCompExprPtr comp, + int depth); +#endif +/** + * NodeSet handling. + */ +XMLPUBFUN int + xmlXPathNodeSetContains (xmlNodeSetPtr cur, + xmlNodePtr val); +XMLPUBFUN xmlNodeSetPtr + xmlXPathDifference (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +XMLPUBFUN xmlNodeSetPtr + xmlXPathIntersection (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +XMLPUBFUN xmlNodeSetPtr + xmlXPathDistinctSorted (xmlNodeSetPtr nodes); +XMLPUBFUN xmlNodeSetPtr + xmlXPathDistinct (xmlNodeSetPtr nodes); + +XMLPUBFUN int + xmlXPathHasSameNodes (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeLeadingSorted (xmlNodeSetPtr nodes, + xmlNodePtr node); +XMLPUBFUN xmlNodeSetPtr + xmlXPathLeadingSorted (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeLeading (xmlNodeSetPtr nodes, + xmlNodePtr node); +XMLPUBFUN xmlNodeSetPtr + xmlXPathLeading (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeTrailingSorted (xmlNodeSetPtr nodes, + xmlNodePtr node); +XMLPUBFUN xmlNodeSetPtr + xmlXPathTrailingSorted (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeTrailing (xmlNodeSetPtr nodes, + xmlNodePtr node); +XMLPUBFUN xmlNodeSetPtr + xmlXPathTrailing (xmlNodeSetPtr nodes1, + xmlNodeSetPtr nodes2); + + +/** + * Extending a context. + */ + +XMLPUBFUN int + xmlXPathRegisterNs (xmlXPathContextPtr ctxt, + const xmlChar *prefix, + const xmlChar *ns_uri); +XMLPUBFUN const xmlChar * + xmlXPathNsLookup (xmlXPathContextPtr ctxt, + const xmlChar *prefix); +XMLPUBFUN void + xmlXPathRegisteredNsCleanup (xmlXPathContextPtr ctxt); + +XMLPUBFUN int + xmlXPathRegisterFunc (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathFunction f); +XMLPUBFUN int + xmlXPathRegisterFuncNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathFunction f); +XMLPUBFUN int + xmlXPathRegisterVariable (xmlXPathContextPtr ctxt, + const xmlChar *name, + xmlXPathObjectPtr value); +XMLPUBFUN int + xmlXPathRegisterVariableNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri, + xmlXPathObjectPtr value); +XMLPUBFUN xmlXPathFunction + xmlXPathFunctionLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +XMLPUBFUN xmlXPathFunction + xmlXPathFunctionLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +XMLPUBFUN void + xmlXPathRegisteredFuncsCleanup (xmlXPathContextPtr ctxt); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathVariableLookup (xmlXPathContextPtr ctxt, + const xmlChar *name); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathVariableLookupNS (xmlXPathContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +XMLPUBFUN void + xmlXPathRegisteredVariablesCleanup(xmlXPathContextPtr ctxt); + +/** + * Utilities to extend XPath. + */ +XMLPUBFUN xmlXPathParserContextPtr + xmlXPathNewParserContext (const xmlChar *str, + xmlXPathContextPtr ctxt); +XMLPUBFUN void + xmlXPathFreeParserContext (xmlXPathParserContextPtr ctxt); + +/* TODO: remap to xmlXPathValuePop and Push. */ +XMLPUBFUN xmlXPathObjectPtr + valuePop (xmlXPathParserContextPtr ctxt); +XMLPUBFUN int + valuePush (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr value); + +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewString (const xmlChar *val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewCString (const char *val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathWrapString (xmlChar *val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathWrapCString (char * val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewFloat (double val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewBoolean (int val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewNodeSet (xmlNodePtr val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewValueTree (xmlNodePtr val); +XMLPUBFUN int + xmlXPathNodeSetAdd (xmlNodeSetPtr cur, + xmlNodePtr val); +XMLPUBFUN int + xmlXPathNodeSetAddUnique (xmlNodeSetPtr cur, + xmlNodePtr val); +XMLPUBFUN int + xmlXPathNodeSetAddNs (xmlNodeSetPtr cur, + xmlNodePtr node, + xmlNsPtr ns); +XMLPUBFUN void + xmlXPathNodeSetSort (xmlNodeSetPtr set); + +XMLPUBFUN void + xmlXPathRoot (xmlXPathParserContextPtr ctxt); +XMLPUBFUN void + xmlXPathEvalExpr (xmlXPathParserContextPtr ctxt); +XMLPUBFUN xmlChar * + xmlXPathParseName (xmlXPathParserContextPtr ctxt); +XMLPUBFUN xmlChar * + xmlXPathParseNCName (xmlXPathParserContextPtr ctxt); + +/* + * Existing functions. + */ +XMLPUBFUN double + xmlXPathStringEvalNumber (const xmlChar *str); +XMLPUBFUN int + xmlXPathEvaluatePredicateResult (xmlXPathParserContextPtr ctxt, + xmlXPathObjectPtr res); +XMLPUBFUN void + xmlXPathRegisterAllFunctions (xmlXPathContextPtr ctxt); +XMLPUBFUN xmlNodeSetPtr + xmlXPathNodeSetMerge (xmlNodeSetPtr val1, + xmlNodeSetPtr val2); +XMLPUBFUN void + xmlXPathNodeSetDel (xmlNodeSetPtr cur, + xmlNodePtr val); +XMLPUBFUN void + xmlXPathNodeSetRemove (xmlNodeSetPtr cur, + int val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathNewNodeSetList (xmlNodeSetPtr val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathWrapNodeSet (xmlNodeSetPtr val); +XMLPUBFUN xmlXPathObjectPtr + xmlXPathWrapExternal (void *val); + +XMLPUBFUN int xmlXPathEqualValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN int xmlXPathNotEqualValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN int xmlXPathCompareValues(xmlXPathParserContextPtr ctxt, int inf, int strict); +XMLPUBFUN void xmlXPathValueFlipSign(xmlXPathParserContextPtr ctxt); +XMLPUBFUN void xmlXPathAddValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN void xmlXPathSubValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN void xmlXPathMultValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN void xmlXPathDivValues(xmlXPathParserContextPtr ctxt); +XMLPUBFUN void xmlXPathModValues(xmlXPathParserContextPtr ctxt); + +XMLPUBFUN int xmlXPathIsNodeType(const xmlChar *name); + +/* + * Some of the axis navigation routines. + */ +XMLPUBFUN xmlNodePtr xmlXPathNextSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextChild(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextDescendant(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextDescendantOrSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextParent(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextAncestorOrSelf(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextFollowingSibling(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextFollowing(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextNamespace(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextAttribute(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextPreceding(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextAncestor(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +XMLPUBFUN xmlNodePtr xmlXPathNextPrecedingSibling(xmlXPathParserContextPtr ctxt, + xmlNodePtr cur); +/* + * The official core of XPath functions. + */ +XMLPUBFUN void xmlXPathLastFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathPositionFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathCountFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathIdFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathLocalNameFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathNamespaceURIFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathStringFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathStringLengthFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathConcatFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathContainsFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathStartsWithFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathSubstringFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathSubstringBeforeFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathSubstringAfterFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathNormalizeFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathTranslateFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathNotFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathTrueFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathFalseFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathLangFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathNumberFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathSumFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathFloorFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathCeilingFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathRoundFunction(xmlXPathParserContextPtr ctxt, int nargs); +XMLPUBFUN void xmlXPathBooleanFunction(xmlXPathParserContextPtr ctxt, int nargs); + +/** + * Really internal functions + */ +XMLPUBFUN void xmlXPathNodeSetFreeNs(xmlNsPtr ns); + +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_XPATH_ENABLED */ +#endif /* ! __XML_XPATH_INTERNALS_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpointer.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpointer.h new file mode 100644 index 00000000..a5260008 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxml/xpointer.h @@ -0,0 +1,138 @@ +/* + * Summary: API to handle XML Pointers + * Description: API to handle XML Pointers + * Base implementation was made accordingly to + * W3C Candidate Recommendation 7 June 2000 + * http://www.w3.org/TR/2000/CR-xptr-20000607 + * + * Added support for the element() scheme described in: + * W3C Proposed Recommendation 13 November 2002 + * http://www.w3.org/TR/2002/PR-xptr-element-20021113/ + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XPTR_H__ +#define __XML_XPTR_H__ + +#include <libxml/xmlversion.h> + +#ifdef LIBXML_XPTR_ENABLED + +#include <libxml/tree.h> +#include <libxml/xpath.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(LIBXML_XPTR_LOCS_ENABLED) +/* + * A Location Set + */ +typedef struct _xmlLocationSet xmlLocationSet; +typedef xmlLocationSet *xmlLocationSetPtr; +struct _xmlLocationSet { + int locNr; /* number of locations in the set */ + int locMax; /* size of the array as allocated */ + xmlXPathObjectPtr *locTab;/* array of locations */ +}; + +/* + * Handling of location sets. + */ + +XML_DEPRECATED +XMLPUBFUN xmlLocationSetPtr + xmlXPtrLocationSetCreate (xmlXPathObjectPtr val); +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrFreeLocationSet (xmlLocationSetPtr obj); +XML_DEPRECATED +XMLPUBFUN xmlLocationSetPtr + xmlXPtrLocationSetMerge (xmlLocationSetPtr val1, + xmlLocationSetPtr val2); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRange (xmlNodePtr start, + int startindex, + xmlNodePtr end, + int endindex); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRangePoints (xmlXPathObjectPtr start, + xmlXPathObjectPtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRangeNodePoint (xmlNodePtr start, + xmlXPathObjectPtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRangePointNode (xmlXPathObjectPtr start, + xmlNodePtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRangeNodes (xmlNodePtr start, + xmlNodePtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewLocationSetNodes (xmlNodePtr start, + xmlNodePtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewLocationSetNodeSet(xmlNodeSetPtr set); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewRangeNodeObject (xmlNodePtr start, + xmlXPathObjectPtr end); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrNewCollapsedRange (xmlNodePtr start); +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrLocationSetAdd (xmlLocationSetPtr cur, + xmlXPathObjectPtr val); +XML_DEPRECATED +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrWrapLocationSet (xmlLocationSetPtr val); +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrLocationSetDel (xmlLocationSetPtr cur, + xmlXPathObjectPtr val); +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrLocationSetRemove (xmlLocationSetPtr cur, + int val); +#endif /* defined(LIBXML_XPTR_LOCS_ENABLED) */ + +/* + * Functions. + */ +XMLPUBFUN xmlXPathContextPtr + xmlXPtrNewContext (xmlDocPtr doc, + xmlNodePtr here, + xmlNodePtr origin); +XMLPUBFUN xmlXPathObjectPtr + xmlXPtrEval (const xmlChar *str, + xmlXPathContextPtr ctx); + +#if defined(LIBXML_XPTR_LOCS_ENABLED) +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrRangeToFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XML_DEPRECATED +XMLPUBFUN xmlNodePtr + xmlXPtrBuildNodeList (xmlXPathObjectPtr obj); +XML_DEPRECATED +XMLPUBFUN void + xmlXPtrEvalRangePredicate (xmlXPathParserContextPtr ctxt); +#endif /* defined(LIBXML_XPTR_LOCS_ENABLED) */ +#ifdef __cplusplus +} +#endif + +#endif /* LIBXML_XPTR_ENABLED */ +#endif /* __XML_XPTR_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/__init__.py b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/__init__.py new file mode 100644 index 00000000..e69de29b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/__init__.py diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/attributes.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/attributes.h new file mode 100644 index 00000000..d9b99a74 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/attributes.h @@ -0,0 +1,39 @@ +/* + * Summary: interface for the XSLT attribute handling + * Description: this module handles the specificities of attribute + * and attribute groups processing. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_ATTRIBUTES_H__ +#define __XML_XSLT_ATTRIBUTES_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +XSLTPUBFUN void XSLTCALL + xsltParseStylesheetAttributeSet (xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN void XSLTCALL + xsltFreeAttributeSetsHashes (xsltStylesheetPtr style); +XSLTPUBFUN void XSLTCALL + xsltApplyAttributeSet (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + const xmlChar *attributes); +XSLTPUBFUN void XSLTCALL + xsltResolveStylesheetAttributeSet(xsltStylesheetPtr style); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_ATTRIBUTES_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/documents.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/documents.h new file mode 100644 index 00000000..ae7c0ca2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/documents.h @@ -0,0 +1,93 @@ +/* + * Summary: interface for the document handling + * Description: implements document loading and cache (multiple + * document() reference for the same resources must + * be equal. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_DOCUMENTS_H__ +#define __XML_XSLT_DOCUMENTS_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +XSLTPUBFUN xsltDocumentPtr XSLTCALL + xsltNewDocument (xsltTransformContextPtr ctxt, + xmlDocPtr doc); +XSLTPUBFUN xsltDocumentPtr XSLTCALL + xsltLoadDocument (xsltTransformContextPtr ctxt, + const xmlChar *URI); +XSLTPUBFUN xsltDocumentPtr XSLTCALL + xsltFindDocument (xsltTransformContextPtr ctxt, + xmlDocPtr doc); +XSLTPUBFUN void XSLTCALL + xsltFreeDocuments (xsltTransformContextPtr ctxt); + +XSLTPUBFUN xsltDocumentPtr XSLTCALL + xsltLoadStyleDocument (xsltStylesheetPtr style, + const xmlChar *URI); +XSLTPUBFUN xsltDocumentPtr XSLTCALL + xsltNewStyleDocument (xsltStylesheetPtr style, + xmlDocPtr doc); +XSLTPUBFUN void XSLTCALL + xsltFreeStyleDocuments (xsltStylesheetPtr style); + +/* + * Hooks for document loading + */ + +/** + * xsltLoadType: + * + * Enum defining the kind of loader requirement. + */ +typedef enum { + XSLT_LOAD_START = 0, /* loading for a top stylesheet */ + XSLT_LOAD_STYLESHEET = 1, /* loading for a stylesheet include/import */ + XSLT_LOAD_DOCUMENT = 2 /* loading document at transformation time */ +} xsltLoadType; + +/** + * xsltDocLoaderFunc: + * @URI: the URI of the document to load + * @dict: the dictionary to use when parsing that document + * @options: parsing options, a set of xmlParserOption + * @ctxt: the context, either a stylesheet or a transformation context + * @type: the xsltLoadType indicating the kind of loading required + * + * An xsltDocLoaderFunc is a signature for a function which can be + * registered to load document not provided by the compilation or + * transformation API themselve, for example when an xsl:import, + * xsl:include is found at compilation time or when a document() + * call is made at runtime. + * + * Returns the pointer to the document (which will be modified and + * freed by the engine later), or NULL in case of error. + */ +typedef xmlDocPtr (*xsltDocLoaderFunc) (const xmlChar *URI, + xmlDictPtr dict, + int options, + void *ctxt, + xsltLoadType type); + +XSLTPUBFUN void XSLTCALL + xsltSetLoaderFunc (xsltDocLoaderFunc f); + +/* the loader may be needed by extension libraries so it is exported */ +XSLTPUBVAR xsltDocLoaderFunc xsltDocDefaultLoader; + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_DOCUMENTS_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extensions.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extensions.h new file mode 100644 index 00000000..84d6aa44 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extensions.h @@ -0,0 +1,262 @@ +/* + * Summary: interface for the extension support + * Description: This provide the API needed for simple and module + * extension support. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_EXTENSION_H__ +#define __XML_XSLT_EXTENSION_H__ + +#include <libxml/xpath.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Extension Modules API. + */ + +/** + * xsltInitGlobals: + * + * Initialize the global variables for extensions + * + */ + +XSLTPUBFUN void XSLTCALL + xsltInitGlobals (void); + +/** + * xsltStyleExtInitFunction: + * @ctxt: an XSLT stylesheet + * @URI: the namespace URI for the extension + * + * A function called at initialization time of an XSLT extension module. + * + * Returns a pointer to the module specific data for this transformation. + */ +typedef void * (*xsltStyleExtInitFunction) (xsltStylesheetPtr style, + const xmlChar *URI); + +/** + * xsltStyleExtShutdownFunction: + * @ctxt: an XSLT stylesheet + * @URI: the namespace URI for the extension + * @data: the data associated to this module + * + * A function called at shutdown time of an XSLT extension module. + */ +typedef void (*xsltStyleExtShutdownFunction) (xsltStylesheetPtr style, + const xmlChar *URI, + void *data); + +/** + * xsltExtInitFunction: + * @ctxt: an XSLT transformation context + * @URI: the namespace URI for the extension + * + * A function called at initialization time of an XSLT extension module. + * + * Returns a pointer to the module specific data for this transformation. + */ +typedef void * (*xsltExtInitFunction) (xsltTransformContextPtr ctxt, + const xmlChar *URI); + +/** + * xsltExtShutdownFunction: + * @ctxt: an XSLT transformation context + * @URI: the namespace URI for the extension + * @data: the data associated to this module + * + * A function called at shutdown time of an XSLT extension module. + */ +typedef void (*xsltExtShutdownFunction) (xsltTransformContextPtr ctxt, + const xmlChar *URI, + void *data); + +XSLTPUBFUN int XSLTCALL + xsltRegisterExtModule (const xmlChar *URI, + xsltExtInitFunction initFunc, + xsltExtShutdownFunction shutdownFunc); +XSLTPUBFUN int XSLTCALL + xsltRegisterExtModuleFull + (const xmlChar * URI, + xsltExtInitFunction initFunc, + xsltExtShutdownFunction shutdownFunc, + xsltStyleExtInitFunction styleInitFunc, + xsltStyleExtShutdownFunction styleShutdownFunc); + +XSLTPUBFUN int XSLTCALL + xsltUnregisterExtModule (const xmlChar * URI); + +XSLTPUBFUN void * XSLTCALL + xsltGetExtData (xsltTransformContextPtr ctxt, + const xmlChar *URI); + +XSLTPUBFUN void * XSLTCALL + xsltStyleGetExtData (xsltStylesheetPtr style, + const xmlChar *URI); +#ifdef XSLT_REFACTORED +XSLTPUBFUN void * XSLTCALL + xsltStyleStylesheetLevelGetExtData( + xsltStylesheetPtr style, + const xmlChar * URI); +#endif +XSLTPUBFUN void XSLTCALL + xsltShutdownCtxtExts (xsltTransformContextPtr ctxt); + +XSLTPUBFUN void XSLTCALL + xsltShutdownExts (xsltStylesheetPtr style); + +XSLTPUBFUN xsltTransformContextPtr XSLTCALL + xsltXPathGetTransformContext + (xmlXPathParserContextPtr ctxt); + +/* + * extension functions +*/ +XSLTPUBFUN int XSLTCALL + xsltRegisterExtModuleFunction + (const xmlChar *name, + const xmlChar *URI, + xmlXPathFunction function); +XSLTPUBFUN xmlXPathFunction XSLTCALL + xsltExtModuleFunctionLookup (const xmlChar *name, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltUnregisterExtModuleFunction + (const xmlChar *name, + const xmlChar *URI); + +/* + * extension elements + */ +typedef xsltElemPreCompPtr (*xsltPreComputeFunction) + (xsltStylesheetPtr style, + xmlNodePtr inst, + xsltTransformFunction function); + +XSLTPUBFUN xsltElemPreCompPtr XSLTCALL + xsltNewElemPreComp (xsltStylesheetPtr style, + xmlNodePtr inst, + xsltTransformFunction function); +XSLTPUBFUN void XSLTCALL + xsltInitElemPreComp (xsltElemPreCompPtr comp, + xsltStylesheetPtr style, + xmlNodePtr inst, + xsltTransformFunction function, + xsltElemPreCompDeallocator freeFunc); + +XSLTPUBFUN int XSLTCALL + xsltRegisterExtModuleElement + (const xmlChar *name, + const xmlChar *URI, + xsltPreComputeFunction precomp, + xsltTransformFunction transform); +XSLTPUBFUN xsltTransformFunction XSLTCALL + xsltExtElementLookup (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *URI); +XSLTPUBFUN xsltTransformFunction XSLTCALL + xsltExtModuleElementLookup + (const xmlChar *name, + const xmlChar *URI); +XSLTPUBFUN xsltPreComputeFunction XSLTCALL + xsltExtModuleElementPreComputeLookup + (const xmlChar *name, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltUnregisterExtModuleElement + (const xmlChar *name, + const xmlChar *URI); + +/* + * top-level elements + */ +typedef void (*xsltTopLevelFunction) (xsltStylesheetPtr style, + xmlNodePtr inst); + +XSLTPUBFUN int XSLTCALL + xsltRegisterExtModuleTopLevel + (const xmlChar *name, + const xmlChar *URI, + xsltTopLevelFunction function); +XSLTPUBFUN xsltTopLevelFunction XSLTCALL + xsltExtModuleTopLevelLookup + (const xmlChar *name, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltUnregisterExtModuleTopLevel + (const xmlChar *name, + const xmlChar *URI); + + +/* These 2 functions are deprecated for use within modules. */ +XSLTPUBFUN int XSLTCALL + xsltRegisterExtFunction (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *URI, + xmlXPathFunction function); +XSLTPUBFUN int XSLTCALL + xsltRegisterExtElement (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *URI, + xsltTransformFunction function); + +/* + * Extension Prefix handling API. + * Those are used by the XSLT (pre)processor. + */ + +XSLTPUBFUN int XSLTCALL + xsltRegisterExtPrefix (xsltStylesheetPtr style, + const xmlChar *prefix, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltCheckExtPrefix (xsltStylesheetPtr style, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltCheckExtURI (xsltStylesheetPtr style, + const xmlChar *URI); +XSLTPUBFUN int XSLTCALL + xsltInitCtxtExts (xsltTransformContextPtr ctxt); +XSLTPUBFUN void XSLTCALL + xsltFreeCtxtExts (xsltTransformContextPtr ctxt); +XSLTPUBFUN void XSLTCALL + xsltFreeExts (xsltStylesheetPtr style); + +XSLTPUBFUN xsltElemPreCompPtr XSLTCALL + xsltPreComputeExtModuleElement + (xsltStylesheetPtr style, + xmlNodePtr inst); +/* + * Extension Infos access. + * Used by exslt initialisation + */ + +XSLTPUBFUN xmlHashTablePtr XSLTCALL + xsltGetExtInfo (xsltStylesheetPtr style, + const xmlChar *URI); + +/** + * Test of the extension module API + */ +XSLTPUBFUN void XSLTCALL + xsltRegisterTestModule (void); +XSLTPUBFUN void XSLTCALL + xsltDebugDumpExtensions (FILE * output); + + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_EXTENSION_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extra.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extra.h new file mode 100644 index 00000000..e512fd03 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/extra.h @@ -0,0 +1,72 @@ +/* + * Summary: interface for the non-standard features + * Description: implement some extension outside the XSLT namespace + * but not EXSLT with is in a different library. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_EXTRA_H__ +#define __XML_XSLT_EXTRA_H__ + +#include <libxml/xpath.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XSLT_LIBXSLT_NAMESPACE: + * + * This is the libxslt namespace for specific extensions. + */ +#define XSLT_LIBXSLT_NAMESPACE ((xmlChar *) "http://xmlsoft.org/XSLT/namespace") + +/** + * XSLT_SAXON_NAMESPACE: + * + * This is Michael Kay's Saxon processor namespace for extensions. + */ +#define XSLT_SAXON_NAMESPACE ((xmlChar *) "http://icl.com/saxon") + +/** + * XSLT_XT_NAMESPACE: + * + * This is James Clark's XT processor namespace for extensions. + */ +#define XSLT_XT_NAMESPACE ((xmlChar *) "http://www.jclark.com/xt") + +/** + * XSLT_XALAN_NAMESPACE: + * + * This is the Apache project XALAN processor namespace for extensions. + */ +#define XSLT_XALAN_NAMESPACE ((xmlChar *) \ + "org.apache.xalan.xslt.extensions.Redirect") + + +XSLTPUBFUN void XSLTCALL + xsltFunctionNodeSet (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltDebug (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); + + +XSLTPUBFUN void XSLTCALL + xsltRegisterExtras (xsltTransformContextPtr ctxt); +XSLTPUBFUN void XSLTCALL + xsltRegisterAllExtras (void); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_EXTRA_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/functions.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/functions.h new file mode 100644 index 00000000..5455b7f4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/functions.h @@ -0,0 +1,78 @@ +/* + * Summary: interface for the XSLT functions not from XPath + * Description: a set of extra functions coming from XSLT but not in XPath + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard and Bjorn Reese <breese@users.sourceforge.net> + */ + +#ifndef __XML_XSLT_FUNCTIONS_H__ +#define __XML_XSLT_FUNCTIONS_H__ + +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XSLT_REGISTER_FUNCTION_LOOKUP: + * + * Registering macro, not general purpose at all but used in different modules. + */ +#define XSLT_REGISTER_FUNCTION_LOOKUP(ctxt) \ + xmlXPathRegisterFuncLookup((ctxt)->xpathCtxt, \ + xsltXPathFunctionLookup, \ + (void *)(ctxt->xpathCtxt)); + +XSLTPUBFUN xmlXPathFunction XSLTCALL + xsltXPathFunctionLookup (void *vctxt, + const xmlChar *name, + const xmlChar *ns_uri); + +/* + * Interfaces for the functions implementations. + */ + +XSLTPUBFUN void XSLTCALL + xsltDocumentFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltKeyFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltUnparsedEntityURIFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltFormatNumberFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltGenerateIdFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltSystemPropertyFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltElementAvailableFunction (xmlXPathParserContextPtr ctxt, + int nargs); +XSLTPUBFUN void XSLTCALL + xsltFunctionAvailableFunction (xmlXPathParserContextPtr ctxt, + int nargs); + +/* + * And the registration + */ + +XSLTPUBFUN void XSLTCALL + xsltRegisterAllFunctions (xmlXPathContextPtr ctxt); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_FUNCTIONS_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/imports.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/imports.h new file mode 100644 index 00000000..95e44e51 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/imports.h @@ -0,0 +1,75 @@ +/* + * Summary: interface for the XSLT import support + * Description: macros and fuctions needed to implement and + * access the import tree + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_IMPORTS_H__ +#define __XML_XSLT_IMPORTS_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XSLT_GET_IMPORT_PTR: + * + * A macro to import pointers from the stylesheet cascading order. + */ +#define XSLT_GET_IMPORT_PTR(res, style, name) { \ + xsltStylesheetPtr st = style; \ + res = NULL; \ + while (st != NULL) { \ + if (st->name != NULL) { res = st->name; break; } \ + st = xsltNextImport(st); \ + }} + +/** + * XSLT_GET_IMPORT_INT: + * + * A macro to import intergers from the stylesheet cascading order. + */ +#define XSLT_GET_IMPORT_INT(res, style, name) { \ + xsltStylesheetPtr st = style; \ + res = -1; \ + while (st != NULL) { \ + if (st->name != -1) { res = st->name; break; } \ + st = xsltNextImport(st); \ + }} + +/* + * Module interfaces + */ +XSLTPUBFUN int XSLTCALL + xsltParseStylesheetImport(xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN int XSLTCALL + xsltParseStylesheetInclude + (xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltNextImport (xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltNeedElemSpaceHandling(xsltTransformContextPtr ctxt); +XSLTPUBFUN int XSLTCALL + xsltFindElemSpaceHandling(xsltTransformContextPtr ctxt, + xmlNodePtr node); +XSLTPUBFUN xsltTemplatePtr XSLTCALL + xsltFindTemplate (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *nameURI); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_IMPORTS_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/keys.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/keys.h new file mode 100644 index 00000000..757d1224 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/keys.h @@ -0,0 +1,53 @@ +/* + * Summary: interface for the key matching used in key() and template matches. + * Description: implementation of the key mechanims. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_KEY_H__ +#define __XML_XSLT_KEY_H__ + +#include <libxml/xpath.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * NODE_IS_KEYED: + * + * check for bit 15 set + */ +#define NODE_IS_KEYED (1 >> 15) + +XSLTPUBFUN int XSLTCALL + xsltAddKey (xsltStylesheetPtr style, + const xmlChar *name, + const xmlChar *nameURI, + const xmlChar *match, + const xmlChar *use, + xmlNodePtr inst); +XSLTPUBFUN xmlNodeSetPtr XSLTCALL + xsltGetKey (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *nameURI, + const xmlChar *value); +XSLTPUBFUN void XSLTCALL + xsltInitCtxtKeys (xsltTransformContextPtr ctxt, + xsltDocumentPtr doc); +XSLTPUBFUN void XSLTCALL + xsltFreeKeys (xsltStylesheetPtr style); +XSLTPUBFUN void XSLTCALL + xsltFreeDocumentKeys (xsltDocumentPtr doc); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/namespaces.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/namespaces.h new file mode 100644 index 00000000..fa2d3b4c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/namespaces.h @@ -0,0 +1,68 @@ +/* + * Summary: interface for the XSLT namespace handling + * Description: set of function easing the processing and generation + * of namespace nodes in XSLT. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_NAMESPACES_H__ +#define __XML_XSLT_NAMESPACES_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Used within nsAliases hashtable when the default namespace is required + * but it's not been explicitly defined + */ +/** + * UNDEFINED_DEFAULT_NS: + * + * Special value for undefined namespace, internal + */ +#define UNDEFINED_DEFAULT_NS (const xmlChar *) -1L + +XSLTPUBFUN void XSLTCALL + xsltNamespaceAlias (xsltStylesheetPtr style, + xmlNodePtr node); +XSLTPUBFUN xmlNsPtr XSLTCALL + xsltGetNamespace (xsltTransformContextPtr ctxt, + xmlNodePtr cur, + xmlNsPtr ns, + xmlNodePtr out); +XSLTPUBFUN xmlNsPtr XSLTCALL + xsltGetPlainNamespace (xsltTransformContextPtr ctxt, + xmlNodePtr cur, + xmlNsPtr ns, + xmlNodePtr out); +XSLTPUBFUN xmlNsPtr XSLTCALL + xsltGetSpecialNamespace (xsltTransformContextPtr ctxt, + xmlNodePtr cur, + const xmlChar *URI, + const xmlChar *prefix, + xmlNodePtr out); +XSLTPUBFUN xmlNsPtr XSLTCALL + xsltCopyNamespace (xsltTransformContextPtr ctxt, + xmlNodePtr elem, + xmlNsPtr ns); +XSLTPUBFUN xmlNsPtr XSLTCALL + xsltCopyNamespaceList (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNsPtr cur); +XSLTPUBFUN void XSLTCALL + xsltFreeNamespaceAliasHashes + (xsltStylesheetPtr style); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_NAMESPACES_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/numbersInternals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/numbersInternals.h new file mode 100644 index 00000000..85245928 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/numbersInternals.h @@ -0,0 +1,73 @@ +/* + * Summary: Implementation of the XSLT number functions + * Description: Implementation of the XSLT number functions + * + * Copy: See Copyright for the status of this software. + * + * Author: Bjorn Reese <breese@users.sourceforge.net> and Daniel Veillard + */ + +#ifndef __XML_XSLT_NUMBERSINTERNALS_H__ +#define __XML_XSLT_NUMBERSINTERNALS_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct _xsltCompMatch; + +/** + * xsltNumberData: + * + * This data structure is just a wrapper to pass xsl:number data in. + */ +typedef struct _xsltNumberData xsltNumberData; +typedef xsltNumberData *xsltNumberDataPtr; + +struct _xsltNumberData { + const xmlChar *level; + const xmlChar *count; + const xmlChar *from; + const xmlChar *value; + const xmlChar *format; + int has_format; + int digitsPerGroup; + int groupingCharacter; + int groupingCharacterLen; + xmlDocPtr doc; + xmlNodePtr node; + struct _xsltCompMatch *countPat; + struct _xsltCompMatch *fromPat; + + /* + * accelerators + */ +}; + +/** + * xsltFormatNumberInfo,: + * + * This data structure lists the various parameters needed to format numbers. + */ +typedef struct _xsltFormatNumberInfo xsltFormatNumberInfo; +typedef xsltFormatNumberInfo *xsltFormatNumberInfoPtr; + +struct _xsltFormatNumberInfo { + int integer_hash; /* Number of '#' in integer part */ + int integer_digits; /* Number of '0' in integer part */ + int frac_digits; /* Number of '0' in fractional part */ + int frac_hash; /* Number of '#' in fractional part */ + int group; /* Number of chars per display 'group' */ + int multiplier; /* Scaling for percent or permille */ + char add_decimal; /* Flag for whether decimal point appears in pattern */ + char is_multiplier_set; /* Flag to catch multiple occurences of percent/permille */ + char is_negative_pattern;/* Flag for processing -ve prefix/suffix */ +}; + +#ifdef __cplusplus +} +#endif +#endif /* __XML_XSLT_NUMBERSINTERNALS_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/pattern.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/pattern.h new file mode 100644 index 00000000..a0991c0c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/pattern.h @@ -0,0 +1,84 @@ +/* + * Summary: interface for the pattern matching used in template matches. + * Description: the implementation of the lookup of the right template + * for a given node must be really fast in order to keep + * decent performances. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_PATTERN_H__ +#define __XML_XSLT_PATTERN_H__ + +#include "xsltInternals.h" +#include "xsltexports.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xsltCompMatch: + * + * Data structure used for the implementation of patterns. + * It is kept private (in pattern.c). + */ +typedef struct _xsltCompMatch xsltCompMatch; +typedef xsltCompMatch *xsltCompMatchPtr; + +/* + * Pattern related interfaces. + */ + +XSLTPUBFUN xsltCompMatchPtr XSLTCALL + xsltCompilePattern (const xmlChar *pattern, + xmlDocPtr doc, + xmlNodePtr node, + xsltStylesheetPtr style, + xsltTransformContextPtr runtime); +XSLTPUBFUN void XSLTCALL + xsltFreeCompMatchList (xsltCompMatchPtr comp); +XSLTPUBFUN int XSLTCALL + xsltTestCompMatchList (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xsltCompMatchPtr comp); +XSLTPUBFUN void XSLTCALL + xsltCompMatchClearCache (xsltTransformContextPtr ctxt, + xsltCompMatchPtr comp); +XSLTPUBFUN void XSLTCALL + xsltNormalizeCompSteps (void *payload, + void *data, + const xmlChar *name); + +/* + * Template related interfaces. + */ +XSLTPUBFUN int XSLTCALL + xsltAddTemplate (xsltStylesheetPtr style, + xsltTemplatePtr cur, + const xmlChar *mode, + const xmlChar *modeURI); +XSLTPUBFUN xsltTemplatePtr XSLTCALL + xsltGetTemplate (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xsltStylesheetPtr style); +XSLTPUBFUN void XSLTCALL + xsltFreeTemplateHashes (xsltStylesheetPtr style); +XSLTPUBFUN void XSLTCALL + xsltCleanupTemplates (xsltStylesheetPtr style); + +#if 0 +int xsltMatchPattern (xsltTransformContextPtr ctxt, + xmlNodePtr node, + const xmlChar *pattern, + xmlDocPtr ctxtdoc, + xmlNodePtr ctxtnode); +#endif +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_PATTERN_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/preproc.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/preproc.h new file mode 100644 index 00000000..2a2fc7e4 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/preproc.h @@ -0,0 +1,43 @@ +/* + * Summary: precomputing stylesheets + * Description: this is the compilation phase, where most of the + * stylesheet is "compiled" into faster to use data. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_PRECOMP_H__ +#define __XML_XSLT_PRECOMP_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Interfaces + */ +XSLTPUBVAR const xmlChar *xsltExtMarker; + +XSLTPUBFUN xsltElemPreCompPtr XSLTCALL + xsltDocumentComp (xsltStylesheetPtr style, + xmlNodePtr inst, + xsltTransformFunction function); + +XSLTPUBFUN void XSLTCALL + xsltStylePreCompute (xsltStylesheetPtr style, + xmlNodePtr inst); +XSLTPUBFUN void XSLTCALL + xsltFreeStylePreComps (xsltStylesheetPtr style); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_PRECOMP_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/security.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/security.h new file mode 100644 index 00000000..bab5c8c6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/security.h @@ -0,0 +1,104 @@ +/* + * Summary: interface for the libxslt security framework + * Description: the libxslt security framework allow to restrict + * the access to new resources (file or URL) from + * the stylesheet at runtime. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_SECURITY_H__ +#define __XML_XSLT_SECURITY_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * xsltSecurityPref: + * + * structure to indicate the preferences for security in the XSLT + * transformation. + */ +typedef struct _xsltSecurityPrefs xsltSecurityPrefs; +typedef xsltSecurityPrefs *xsltSecurityPrefsPtr; + +/** + * xsltSecurityOption: + * + * the set of option that can be configured + */ +typedef enum { + XSLT_SECPREF_READ_FILE = 1, + XSLT_SECPREF_WRITE_FILE, + XSLT_SECPREF_CREATE_DIRECTORY, + XSLT_SECPREF_READ_NETWORK, + XSLT_SECPREF_WRITE_NETWORK +} xsltSecurityOption; + +/** + * xsltSecurityCheck: + * + * User provided function to check the value of a string like a file + * path or an URL ... + */ +typedef int (*xsltSecurityCheck) (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt, + const char *value); + +/* + * Module interfaces + */ +XSLTPUBFUN xsltSecurityPrefsPtr XSLTCALL + xsltNewSecurityPrefs (void); +XSLTPUBFUN void XSLTCALL + xsltFreeSecurityPrefs (xsltSecurityPrefsPtr sec); +XSLTPUBFUN int XSLTCALL + xsltSetSecurityPrefs (xsltSecurityPrefsPtr sec, + xsltSecurityOption option, + xsltSecurityCheck func); +XSLTPUBFUN xsltSecurityCheck XSLTCALL + xsltGetSecurityPrefs (xsltSecurityPrefsPtr sec, + xsltSecurityOption option); + +XSLTPUBFUN void XSLTCALL + xsltSetDefaultSecurityPrefs (xsltSecurityPrefsPtr sec); +XSLTPUBFUN xsltSecurityPrefsPtr XSLTCALL + xsltGetDefaultSecurityPrefs (void); + +XSLTPUBFUN int XSLTCALL + xsltSetCtxtSecurityPrefs (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt); + +XSLTPUBFUN int XSLTCALL + xsltSecurityAllow (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt, + const char *value); +XSLTPUBFUN int XSLTCALL + xsltSecurityForbid (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt, + const char *value); +/* + * internal interfaces + */ +XSLTPUBFUN int XSLTCALL + xsltCheckWrite (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt, + const xmlChar *URL); +XSLTPUBFUN int XSLTCALL + xsltCheckRead (xsltSecurityPrefsPtr sec, + xsltTransformContextPtr ctxt, + const xmlChar *URL); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_SECURITY_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/templates.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/templates.h new file mode 100644 index 00000000..84a9de4d --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/templates.h @@ -0,0 +1,77 @@ +/* + * Summary: interface for the template processing + * Description: This set of routine encapsulates XPath calls + * and Attribute Value Templates evaluation. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_TEMPLATES_H__ +#define __XML_XSLT_TEMPLATES_H__ + +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +XSLTPUBFUN int XSLTCALL + xsltEvalXPathPredicate (xsltTransformContextPtr ctxt, + xmlXPathCompExprPtr comp, + xmlNsPtr *nsList, + int nsNr); +XSLTPUBFUN xmlChar * XSLTCALL + xsltEvalTemplateString (xsltTransformContextPtr ctxt, + xmlNodePtr contextNode, + xmlNodePtr inst); +XSLTPUBFUN xmlChar * XSLTCALL + xsltEvalAttrValueTemplate (xsltTransformContextPtr ctxt, + xmlNodePtr node, + const xmlChar *name, + const xmlChar *ns); +XSLTPUBFUN const xmlChar * XSLTCALL + xsltEvalStaticAttrValueTemplate (xsltStylesheetPtr style, + xmlNodePtr node, + const xmlChar *name, + const xmlChar *ns, + int *found); + +/* TODO: this is obviously broken ... the namespaces should be passed too ! */ +XSLTPUBFUN xmlChar * XSLTCALL + xsltEvalXPathString (xsltTransformContextPtr ctxt, + xmlXPathCompExprPtr comp); +XSLTPUBFUN xmlChar * XSLTCALL + xsltEvalXPathStringNs (xsltTransformContextPtr ctxt, + xmlXPathCompExprPtr comp, + int nsNr, + xmlNsPtr *nsList); + +XSLTPUBFUN xmlNodePtr * XSLTCALL + xsltTemplateProcess (xsltTransformContextPtr ctxt, + xmlNodePtr node); +XSLTPUBFUN xmlAttrPtr XSLTCALL + xsltAttrListTemplateProcess (xsltTransformContextPtr ctxt, + xmlNodePtr target, + xmlAttrPtr cur); +XSLTPUBFUN xmlAttrPtr XSLTCALL + xsltAttrTemplateProcess (xsltTransformContextPtr ctxt, + xmlNodePtr target, + xmlAttrPtr attr); +XSLTPUBFUN xmlChar * XSLTCALL + xsltAttrTemplateValueProcess (xsltTransformContextPtr ctxt, + const xmlChar* attr); +XSLTPUBFUN xmlChar * XSLTCALL + xsltAttrTemplateValueProcessNode(xsltTransformContextPtr ctxt, + const xmlChar* str, + xmlNodePtr node); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_TEMPLATES_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/transform.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/transform.h new file mode 100644 index 00000000..5a6f7959 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/transform.h @@ -0,0 +1,207 @@ +/* + * Summary: the XSLT engine transformation part. + * Description: This module implements the bulk of the actual + * transformation processing. Most of the xsl: element + * constructs are implemented in this module. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_TRANSFORM_H__ +#define __XML_XSLT_TRANSFORM_H__ + +#include <libxml/parser.h> +#include <libxml/xmlIO.h> +#include "xsltexports.h" +#include <libxslt/xsltInternals.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XInclude default processing. + */ +XSLTPUBFUN void XSLTCALL + xsltSetXIncludeDefault (int xinclude); +XSLTPUBFUN int XSLTCALL + xsltGetXIncludeDefault (void); + +/** + * Export context to users. + */ +XSLTPUBFUN xsltTransformContextPtr XSLTCALL + xsltNewTransformContext (xsltStylesheetPtr style, + xmlDocPtr doc); + +XSLTPUBFUN void XSLTCALL + xsltFreeTransformContext(xsltTransformContextPtr ctxt); + +XSLTPUBFUN xmlDocPtr XSLTCALL + xsltApplyStylesheetUser (xsltStylesheetPtr style, + xmlDocPtr doc, + const char **params, + const char *output, + FILE * profile, + xsltTransformContextPtr userCtxt); +XSLTPUBFUN void XSLTCALL + xsltProcessOneNode (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xsltStackElemPtr params); +/** + * Private Interfaces. + */ +XSLTPUBFUN void XSLTCALL + xsltApplyStripSpaces (xsltTransformContextPtr ctxt, + xmlNodePtr node); +XSLTPUBFUN xmlDocPtr XSLTCALL + xsltApplyStylesheet (xsltStylesheetPtr style, + xmlDocPtr doc, + const char **params); +XSLTPUBFUN xmlDocPtr XSLTCALL + xsltProfileStylesheet (xsltStylesheetPtr style, + xmlDocPtr doc, + const char **params, + FILE * output); +XSLTPUBFUN int XSLTCALL + xsltRunStylesheet (xsltStylesheetPtr style, + xmlDocPtr doc, + const char **params, + const char *output, + xmlSAXHandlerPtr SAX, + xmlOutputBufferPtr IObuf); +XSLTPUBFUN int XSLTCALL + xsltRunStylesheetUser (xsltStylesheetPtr style, + xmlDocPtr doc, + const char **params, + const char *output, + xmlSAXHandlerPtr SAX, + xmlOutputBufferPtr IObuf, + FILE * profile, + xsltTransformContextPtr userCtxt); +XSLTPUBFUN void XSLTCALL + xsltApplyOneTemplate (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr list, + xsltTemplatePtr templ, + xsltStackElemPtr params); +XSLTPUBFUN void XSLTCALL + xsltDocumentElem (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltSort (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltCopy (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltText (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltElement (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltComment (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltAttribute (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltProcessingInstruction(xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltCopyOf (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltValueOf (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltNumber (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltApplyImports (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltCallTemplate (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltApplyTemplates (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltChoose (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltIf (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltForEach (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); +XSLTPUBFUN void XSLTCALL + xsltRegisterAllElement (xsltTransformContextPtr ctxt); + +XSLTPUBFUN xmlNodePtr XSLTCALL + xsltCopyTextString (xsltTransformContextPtr ctxt, + xmlNodePtr target, + const xmlChar *string, + int noescape); + +/* Following 2 functions needed for libexslt/functions.c */ +XSLTPUBFUN void XSLTCALL + xsltLocalVariablePop (xsltTransformContextPtr ctxt, + int limitNr, + int level); +XSLTPUBFUN int XSLTCALL + xsltLocalVariablePush (xsltTransformContextPtr ctxt, + xsltStackElemPtr variable, + int level); +/* + * Hook for the debugger if activated. + */ +XSLTPUBFUN void XSLTCALL + xslHandleDebugger (xmlNodePtr cur, + xmlNodePtr node, + xsltTemplatePtr templ, + xsltTransformContextPtr ctxt); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_TRANSFORM_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/variables.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/variables.h new file mode 100644 index 00000000..e2adee0f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/variables.h @@ -0,0 +1,118 @@ +/* + * Summary: interface for the variable matching and lookup. + * Description: interface for the variable matching and lookup. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_VARIABLES_H__ +#define __XML_XSLT_VARIABLES_H__ + +#include <libxml/xpath.h> +#include <libxml/xpathInternals.h> +#include "xsltexports.h" +#include "xsltInternals.h" +#include "functions.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * XSLT_REGISTER_VARIABLE_LOOKUP: + * + * Registering macro, not general purpose at all but used in different modules. + */ + +#define XSLT_REGISTER_VARIABLE_LOOKUP(ctxt) \ + xmlXPathRegisterVariableLookup((ctxt)->xpathCtxt, \ + xsltXPathVariableLookup, (void *)(ctxt)); \ + xsltRegisterAllFunctions((ctxt)->xpathCtxt); \ + xsltRegisterAllElement(ctxt); \ + (ctxt)->xpathCtxt->extra = ctxt + +/* + * Flags for memory management of RVTs + */ + +/** + * XSLT_RVT_LOCAL: + * + * RVT is destroyed after the current instructions ends. + */ +#define XSLT_RVT_LOCAL 1 + +/** + * XSLT_RVT_FUNC_RESULT: + * + * RVT is part of results returned with func:result. The RVT won't be + * destroyed after exiting a template and will be reset to XSLT_RVT_LOCAL or + * XSLT_RVT_VARIABLE in the template that receives the return value. + */ +#define XSLT_RVT_FUNC_RESULT 2 + +/** + * XSLT_RVT_GLOBAL: + * + * RVT is part of a global variable. + */ +#define XSLT_RVT_GLOBAL 3 + +/* + * Interfaces for the variable module. + */ + +XSLTPUBFUN int XSLTCALL + xsltEvalGlobalVariables (xsltTransformContextPtr ctxt); +XSLTPUBFUN int XSLTCALL + xsltEvalUserParams (xsltTransformContextPtr ctxt, + const char **params); +XSLTPUBFUN int XSLTCALL + xsltQuoteUserParams (xsltTransformContextPtr ctxt, + const char **params); +XSLTPUBFUN int XSLTCALL + xsltEvalOneUserParam (xsltTransformContextPtr ctxt, + const xmlChar * name, + const xmlChar * value); +XSLTPUBFUN int XSLTCALL + xsltQuoteOneUserParam (xsltTransformContextPtr ctxt, + const xmlChar * name, + const xmlChar * value); + +XSLTPUBFUN void XSLTCALL + xsltParseGlobalVariable (xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN void XSLTCALL + xsltParseGlobalParam (xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN void XSLTCALL + xsltParseStylesheetVariable (xsltTransformContextPtr ctxt, + xmlNodePtr cur); +XSLTPUBFUN void XSLTCALL + xsltParseStylesheetParam (xsltTransformContextPtr ctxt, + xmlNodePtr cur); +XSLTPUBFUN xsltStackElemPtr XSLTCALL + xsltParseStylesheetCallerParam (xsltTransformContextPtr ctxt, + xmlNodePtr cur); +XSLTPUBFUN int XSLTCALL + xsltAddStackElemList (xsltTransformContextPtr ctxt, + xsltStackElemPtr elems); +XSLTPUBFUN void XSLTCALL + xsltFreeGlobalVariables (xsltTransformContextPtr ctxt); +XSLTPUBFUN xmlXPathObjectPtr XSLTCALL + xsltVariableLookup (xsltTransformContextPtr ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +XSLTPUBFUN xmlXPathObjectPtr XSLTCALL + xsltXPathVariableLookup (void *ctxt, + const xmlChar *name, + const xmlChar *ns_uri); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_VARIABLES_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xslt.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xslt.h new file mode 100644 index 00000000..02f491a5 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xslt.h @@ -0,0 +1,110 @@ +/* + * Summary: Interfaces, constants and types related to the XSLT engine + * Description: Interfaces, constants and types related to the XSLT engine + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_H__ +#define __XML_XSLT_H__ + +#include <libxml/tree.h> +#include "xsltexports.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XSLT_DEFAULT_VERSION: + * + * The default version of XSLT supported. + */ +#define XSLT_DEFAULT_VERSION "1.0" + +/** + * XSLT_DEFAULT_VENDOR: + * + * The XSLT "vendor" string for this processor. + */ +#define XSLT_DEFAULT_VENDOR "libxslt" + +/** + * XSLT_DEFAULT_URL: + * + * The XSLT "vendor" URL for this processor. + */ +#define XSLT_DEFAULT_URL "http://xmlsoft.org/XSLT/" + +/** + * XSLT_NAMESPACE: + * + * The XSLT specification namespace. + */ +#define XSLT_NAMESPACE ((const xmlChar *)"http://www.w3.org/1999/XSL/Transform") + +/** + * XSLT_PARSE_OPTIONS: + * + * The set of options to pass to an xmlReadxxx when loading files for + * XSLT consumption. + */ +#define XSLT_PARSE_OPTIONS \ + XML_PARSE_NOENT | XML_PARSE_DTDLOAD | XML_PARSE_DTDATTR | XML_PARSE_NOCDATA + +/** + * xsltMaxDepth: + * + * This value is used to detect templates loops. + */ +XSLTPUBVAR int xsltMaxDepth; + +/** + * * xsltMaxVars: + * * + * * This value is used to detect templates loops. + * */ +XSLTPUBVAR int xsltMaxVars; + +/** + * xsltEngineVersion: + * + * The version string for libxslt. + */ +XSLTPUBVAR const char *xsltEngineVersion; + +/** + * xsltLibxsltVersion: + * + * The version of libxslt compiled. + */ +XSLTPUBVAR const int xsltLibxsltVersion; + +/** + * xsltLibxmlVersion: + * + * The version of libxml libxslt was compiled against. + */ +XSLTPUBVAR const int xsltLibxmlVersion; + +/* + * Global initialization function. + */ + +XSLTPUBFUN void XSLTCALL + xsltInit (void); + +/* + * Global cleanup function. + */ +XSLTPUBFUN void XSLTCALL + xsltCleanupGlobals (void); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltInternals.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltInternals.h new file mode 100644 index 00000000..6faa07db --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltInternals.h @@ -0,0 +1,1995 @@ +/* + * Summary: internal data structures, constants and functions + * Description: Internal data structures, constants and functions used + * by the XSLT engine. + * They are not part of the API or ABI, i.e. they can change + * without prior notice, use carefully. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLT_INTERNALS_H__ +#define __XML_XSLT_INTERNALS_H__ + +#include <libxml/tree.h> +#include <libxml/hash.h> +#include <libxml/xpath.h> +#include <libxml/xmlerror.h> +#include <libxml/dict.h> +#include <libxml/xmlstring.h> +#include <libxslt/xslt.h> +#include "xsltexports.h" +#include "numbersInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* #define XSLT_DEBUG_PROFILE_CACHE */ + +/** + * XSLT_IS_TEXT_NODE: + * + * check if the argument is a text node + */ +#define XSLT_IS_TEXT_NODE(n) ((n != NULL) && \ + (((n)->type == XML_TEXT_NODE) || \ + ((n)->type == XML_CDATA_SECTION_NODE))) + + +/** + * XSLT_MARK_RES_TREE_FRAG: + * + * internal macro to set up tree fragments + */ +#define XSLT_MARK_RES_TREE_FRAG(n) \ + (n)->name = (char *) xmlStrdup(BAD_CAST " fake node libxslt"); + +/** + * XSLT_IS_RES_TREE_FRAG: + * + * internal macro to test tree fragments + */ +#define XSLT_IS_RES_TREE_FRAG(n) \ + ((n != NULL) && ((n)->type == XML_DOCUMENT_NODE) && \ + ((n)->name != NULL) && ((n)->name[0] == ' ')) + +/** + * XSLT_REFACTORED_KEYCOMP: + * + * Internal define to enable on-demand xsl:key computation. + * That's the only mode now but the define is kept for compatibility + */ +#define XSLT_REFACTORED_KEYCOMP + +/** + * XSLT_FAST_IF: + * + * Internal define to enable usage of xmlXPathCompiledEvalToBoolean() + * for XSLT "tests"; e.g. in <xsl:if test="/foo/bar"> + */ +#define XSLT_FAST_IF + +/** + * XSLT_REFACTORED: + * + * Internal define to enable the refactored parts of Libxslt. + */ +/* #define XSLT_REFACTORED */ +/* ==================================================================== */ + +/** + * XSLT_REFACTORED_VARS: + * + * Internal define to enable the refactored variable part of libxslt + */ +#define XSLT_REFACTORED_VARS + +#ifdef XSLT_REFACTORED + +extern const xmlChar *xsltXSLTAttrMarker; + + +/* TODO: REMOVE: #define XSLT_REFACTORED_EXCLRESNS */ + +/* TODO: REMOVE: #define XSLT_REFACTORED_NSALIAS */ + +/** + * XSLT_REFACTORED_XSLT_NSCOMP + * + * Internal define to enable the pointer-comparison of + * namespaces of XSLT elements. + */ +/* #define XSLT_REFACTORED_XSLT_NSCOMP */ + +#ifdef XSLT_REFACTORED_XSLT_NSCOMP + +extern const xmlChar *xsltConstNamespaceNameXSLT; + +/** + * IS_XSLT_ELEM_FAST: + * + * quick test to detect XSLT elements + */ +#define IS_XSLT_ELEM_FAST(n) \ + (((n) != NULL) && ((n)->ns != NULL) && \ + ((n)->ns->href == xsltConstNamespaceNameXSLT)) + +/** + * IS_XSLT_ATTR_FAST: + * + * quick test to detect XSLT attributes + */ +#define IS_XSLT_ATTR_FAST(a) \ + (((a) != NULL) && ((a)->ns != NULL) && \ + ((a)->ns->href == xsltConstNamespaceNameXSLT)) + +/** + * XSLT_HAS_INTERNAL_NSMAP: + * + * check for namespace mapping + */ +#define XSLT_HAS_INTERNAL_NSMAP(s) \ + (((s) != NULL) && ((s)->principal) && \ + ((s)->principal->principalData) && \ + ((s)->principal->principalData->nsMap)) + +/** + * XSLT_GET_INTERNAL_NSMAP: + * + * get pointer to namespace map + */ +#define XSLT_GET_INTERNAL_NSMAP(s) ((s)->principal->principalData->nsMap) + +#else /* XSLT_REFACTORED_XSLT_NSCOMP */ + +/** + * IS_XSLT_ELEM_FAST: + * + * quick check whether this is an xslt element + */ +#define IS_XSLT_ELEM_FAST(n) \ + (((n) != NULL) && ((n)->ns != NULL) && \ + (xmlStrEqual((n)->ns->href, XSLT_NAMESPACE))) + +/** + * IS_XSLT_ATTR_FAST: + * + * quick check for xslt namespace attribute + */ +#define IS_XSLT_ATTR_FAST(a) \ + (((a) != NULL) && ((a)->ns != NULL) && \ + (xmlStrEqual((a)->ns->href, XSLT_NAMESPACE))) + + +#endif /* XSLT_REFACTORED_XSLT_NSCOMP */ + + +/** + * XSLT_REFACTORED_MANDATORY_VERSION: + * + * TODO: Currently disabled to surpress regression test failures, since + * the old behaviour was that a missing version attribute + * produced a only a warning and not an error, which was incerrect. + * So the regression tests need to be fixed if this is enabled. + */ +/* #define XSLT_REFACTORED_MANDATORY_VERSION */ + +/** + * xsltPointerList: + * + * Pointer-list for various purposes. + */ +typedef struct _xsltPointerList xsltPointerList; +typedef xsltPointerList *xsltPointerListPtr; +struct _xsltPointerList { + void **items; + int number; + int size; +}; + +#endif + +/** + * XSLT_REFACTORED_PARSING: + * + * Internal define to enable the refactored parts of Libxslt + * related to parsing. + */ +/* #define XSLT_REFACTORED_PARSING */ + +/** + * XSLT_MAX_SORT: + * + * Max number of specified xsl:sort on an element. + */ +#define XSLT_MAX_SORT 15 + +/** + * XSLT_PAT_NO_PRIORITY: + * + * Specific value for pattern without priority expressed. + */ +#define XSLT_PAT_NO_PRIORITY -12345789 + +/** + * xsltRuntimeExtra: + * + * Extra information added to the transformation context. + */ +typedef struct _xsltRuntimeExtra xsltRuntimeExtra; +typedef xsltRuntimeExtra *xsltRuntimeExtraPtr; +struct _xsltRuntimeExtra { + void *info; /* pointer to the extra data */ + xmlFreeFunc deallocate; /* pointer to the deallocation routine */ + union { /* dual-purpose field */ + void *ptr; /* data not needing deallocation */ + int ival; /* integer value storage */ + } val; +}; + +/** + * XSLT_RUNTIME_EXTRA_LST: + * @ctxt: the transformation context + * @nr: the index + * + * Macro used to access extra information stored in the context + */ +#define XSLT_RUNTIME_EXTRA_LST(ctxt, nr) (ctxt)->extras[(nr)].info +/** + * XSLT_RUNTIME_EXTRA_FREE: + * @ctxt: the transformation context + * @nr: the index + * + * Macro used to free extra information stored in the context + */ +#define XSLT_RUNTIME_EXTRA_FREE(ctxt, nr) (ctxt)->extras[(nr)].deallocate +/** + * XSLT_RUNTIME_EXTRA: + * @ctxt: the transformation context + * @nr: the index + * + * Macro used to define extra information stored in the context + */ +#define XSLT_RUNTIME_EXTRA(ctxt, nr, typ) (ctxt)->extras[(nr)].val.typ + +/** + * xsltTemplate: + * + * The in-memory structure corresponding to an XSLT Template. + */ +typedef struct _xsltTemplate xsltTemplate; +typedef xsltTemplate *xsltTemplatePtr; +struct _xsltTemplate { + struct _xsltTemplate *next;/* chained list sorted by priority */ + struct _xsltStylesheet *style;/* the containing stylesheet */ + xmlChar *match; /* the matching string */ + float priority; /* as given from the stylesheet, not computed */ + const xmlChar *name; /* the local part of the name QName */ + const xmlChar *nameURI; /* the URI part of the name QName */ + const xmlChar *mode;/* the local part of the mode QName */ + const xmlChar *modeURI;/* the URI part of the mode QName */ + xmlNodePtr content; /* the template replacement value */ + xmlNodePtr elem; /* the source element */ + + /* + * TODO: @inheritedNsNr and @inheritedNs won't be used in the + * refactored code. + */ + int inheritedNsNr; /* number of inherited namespaces */ + xmlNsPtr *inheritedNs;/* inherited non-excluded namespaces */ + + /* Profiling information */ + int nbCalls; /* the number of time the template was called */ + unsigned long time; /* the time spent in this template */ + void *params; /* xsl:param instructions */ + + int templNr; /* Nb of templates in the stack */ + int templMax; /* Size of the templtes stack */ + xsltTemplatePtr *templCalledTab; /* templates called */ + int *templCountTab; /* .. and how often */ + + /* Conflict resolution */ + int position; +}; + +/** + * xsltDecimalFormat: + * + * Data structure of decimal-format. + */ +typedef struct _xsltDecimalFormat xsltDecimalFormat; +typedef xsltDecimalFormat *xsltDecimalFormatPtr; +struct _xsltDecimalFormat { + struct _xsltDecimalFormat *next; /* chained list */ + xmlChar *name; + /* Used for interpretation of pattern */ + xmlChar *digit; + xmlChar *patternSeparator; + /* May appear in result */ + xmlChar *minusSign; + xmlChar *infinity; + xmlChar *noNumber; /* Not-a-number */ + /* Used for interpretation of pattern and may appear in result */ + xmlChar *decimalPoint; + xmlChar *grouping; + xmlChar *percent; + xmlChar *permille; + xmlChar *zeroDigit; + const xmlChar *nsUri; +}; + +/** + * xsltDocument: + * + * Data structure associated to a parsed document. + */ +typedef struct _xsltDocument xsltDocument; +typedef xsltDocument *xsltDocumentPtr; +struct _xsltDocument { + struct _xsltDocument *next; /* documents are kept in a chained list */ + int main; /* is this the main document */ + xmlDocPtr doc; /* the parsed document */ + void *keys; /* key tables storage */ + struct _xsltDocument *includes; /* subsidiary includes */ + int preproc; /* pre-processing already done */ + int nbKeysComputed; +}; + +/** + * xsltKeyDef: + * + * Representation of an xsl:key. + */ +typedef struct _xsltKeyDef xsltKeyDef; +typedef xsltKeyDef *xsltKeyDefPtr; +struct _xsltKeyDef { + struct _xsltKeyDef *next; + xmlNodePtr inst; + xmlChar *name; + xmlChar *nameURI; + xmlChar *match; + xmlChar *use; + xmlXPathCompExprPtr comp; + xmlXPathCompExprPtr usecomp; + xmlNsPtr *nsList; /* the namespaces in scope */ + int nsNr; /* the number of namespaces in scope */ +}; + +/** + * xsltKeyTable: + * + * Holds the computed keys for key definitions of the same QName. + * Is owned by an xsltDocument. + */ +typedef struct _xsltKeyTable xsltKeyTable; +typedef xsltKeyTable *xsltKeyTablePtr; +struct _xsltKeyTable { + struct _xsltKeyTable *next; + xmlChar *name; + xmlChar *nameURI; + xmlHashTablePtr keys; +}; + +/* + * The in-memory structure corresponding to an XSLT Stylesheet. + * NOTE: most of the content is simply linked from the doc tree + * structure, no specific allocation is made. + */ +typedef struct _xsltStylesheet xsltStylesheet; +typedef xsltStylesheet *xsltStylesheetPtr; + +typedef struct _xsltTransformContext xsltTransformContext; +typedef xsltTransformContext *xsltTransformContextPtr; + +/** + * xsltElemPreComp: + * + * The in-memory structure corresponding to element precomputed data, + * designed to be extended by extension implementors. + */ +typedef struct _xsltElemPreComp xsltElemPreComp; +typedef xsltElemPreComp *xsltElemPreCompPtr; + +/** + * xsltTransformFunction: + * @ctxt: the XSLT transformation context + * @node: the input node + * @inst: the stylesheet node + * @comp: the compiled information from the stylesheet + * + * Signature of the function associated to elements part of the + * stylesheet language like xsl:if or xsl:apply-templates. + */ +typedef void (*xsltTransformFunction) (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst, + xsltElemPreCompPtr comp); + +/** + * xsltSortFunc: + * @ctxt: a transformation context + * @sorts: the node-set to sort + * @nbsorts: the number of sorts + * + * Signature of the function to use during sorting + */ +typedef void (*xsltSortFunc) (xsltTransformContextPtr ctxt, xmlNodePtr *sorts, + int nbsorts); + +typedef enum { + XSLT_FUNC_COPY=1, + XSLT_FUNC_SORT, + XSLT_FUNC_TEXT, + XSLT_FUNC_ELEMENT, + XSLT_FUNC_ATTRIBUTE, + XSLT_FUNC_COMMENT, + XSLT_FUNC_PI, + XSLT_FUNC_COPYOF, + XSLT_FUNC_VALUEOF, + XSLT_FUNC_NUMBER, + XSLT_FUNC_APPLYIMPORTS, + XSLT_FUNC_CALLTEMPLATE, + XSLT_FUNC_APPLYTEMPLATES, + XSLT_FUNC_CHOOSE, + XSLT_FUNC_IF, + XSLT_FUNC_FOREACH, + XSLT_FUNC_DOCUMENT, + XSLT_FUNC_WITHPARAM, + XSLT_FUNC_PARAM, + XSLT_FUNC_VARIABLE, + XSLT_FUNC_WHEN, + XSLT_FUNC_EXTENSION +#ifdef XSLT_REFACTORED + , + XSLT_FUNC_OTHERWISE, + XSLT_FUNC_FALLBACK, + XSLT_FUNC_MESSAGE, + XSLT_FUNC_INCLUDE, + XSLT_FUNC_ATTRSET, + XSLT_FUNC_LITERAL_RESULT_ELEMENT, + XSLT_FUNC_UNKOWN_FORWARDS_COMPAT +#endif +} xsltStyleType; + +/** + * xsltElemPreCompDeallocator: + * @comp: the #xsltElemPreComp to free up + * + * Deallocates an #xsltElemPreComp structure. + */ +typedef void (*xsltElemPreCompDeallocator) (xsltElemPreCompPtr comp); + +/** + * xsltElemPreComp: + * + * The basic structure for compiled items of the AST of the XSLT processor. + * This structure is also intended to be extended by extension implementors. + * TODO: This is somehow not nice, since it has a "free" field, which + * derived stylesheet-structs do not have. + */ +struct _xsltElemPreComp { + xsltElemPreCompPtr next; /* next item in the global chained + list held by xsltStylesheet. */ + xsltStyleType type; /* type of the element */ + xsltTransformFunction func; /* handling function */ + xmlNodePtr inst; /* the node in the stylesheet's tree + corresponding to this item */ + + /* end of common part */ + xsltElemPreCompDeallocator free; /* the deallocator */ +}; + +/** + * xsltStylePreComp: + * + * The abstract basic structure for items of the XSLT processor. + * This includes: + * 1) compiled forms of XSLT instructions (xsl:if, xsl:attribute, etc.) + * 2) compiled forms of literal result elements + * 3) compiled forms of extension elements + */ +typedef struct _xsltStylePreComp xsltStylePreComp; +typedef xsltStylePreComp *xsltStylePreCompPtr; + +#ifdef XSLT_REFACTORED + +/* +* Some pointer-list utility functions. +*/ +XSLTPUBFUN xsltPointerListPtr XSLTCALL + xsltPointerListCreate (int initialSize); +XSLTPUBFUN void XSLTCALL + xsltPointerListFree (xsltPointerListPtr list); +XSLTPUBFUN void XSLTCALL + xsltPointerListClear (xsltPointerListPtr list); +XSLTPUBFUN int XSLTCALL + xsltPointerListAddSize (xsltPointerListPtr list, + void *item, + int initialSize); + +/************************************************************************ + * * + * Refactored structures * + * * + ************************************************************************/ + +typedef struct _xsltNsListContainer xsltNsListContainer; +typedef xsltNsListContainer *xsltNsListContainerPtr; +struct _xsltNsListContainer { + xmlNsPtr *list; + int totalNumber; + int xpathNumber; +}; + +/** + * XSLT_ITEM_COMPATIBILITY_FIELDS: + * + * Fields for API compatibility to the structure + * _xsltElemPreComp which is used for extension functions. + * Note that @next is used for storage; it does not reflect a next + * sibling in the tree. + * TODO: Evaluate if we really need such a compatibility. + */ +#define XSLT_ITEM_COMPATIBILITY_FIELDS \ + xsltElemPreCompPtr next;\ + xsltStyleType type;\ + xsltTransformFunction func;\ + xmlNodePtr inst; + +/** + * XSLT_ITEM_NAVIGATION_FIELDS: + * + * Currently empty. + * TODO: It is intended to hold navigational fields in the future. + */ +#define XSLT_ITEM_NAVIGATION_FIELDS +/* + xsltStylePreCompPtr parent;\ + xsltStylePreCompPtr children;\ + xsltStylePreCompPtr nextItem; +*/ + +/** + * XSLT_ITEM_NSINSCOPE_FIELDS: + * + * The in-scope namespaces. + */ +#define XSLT_ITEM_NSINSCOPE_FIELDS xsltNsListContainerPtr inScopeNs; + +/** + * XSLT_ITEM_COMMON_FIELDS: + * + * Common fields used for all items. + */ +#define XSLT_ITEM_COMMON_FIELDS \ + XSLT_ITEM_COMPATIBILITY_FIELDS \ + XSLT_ITEM_NAVIGATION_FIELDS \ + XSLT_ITEM_NSINSCOPE_FIELDS + +/** + * _xsltStylePreComp: + * + * The abstract basic structure for items of the XSLT processor. + * This includes: + * 1) compiled forms of XSLT instructions (e.g. xsl:if, xsl:attribute, etc.) + * 2) compiled forms of literal result elements + * 3) various properties for XSLT instructions (e.g. xsl:when, + * xsl:with-param) + * + * REVISIT TODO: Keep this structure equal to the fields + * defined by XSLT_ITEM_COMMON_FIELDS + */ +struct _xsltStylePreComp { + xsltElemPreCompPtr next; /* next item in the global chained + list held by xsltStylesheet */ + xsltStyleType type; /* type of the item */ + xsltTransformFunction func; /* handling function */ + xmlNodePtr inst; /* the node in the stylesheet's tree + corresponding to this item. */ + /* Currently no navigational fields. */ + xsltNsListContainerPtr inScopeNs; +}; + +/** + * xsltStyleBasicEmptyItem: + * + * Abstract structure only used as a short-cut for + * XSLT items with no extra fields. + * NOTE that it is intended that this structure looks the same as + * _xsltStylePreComp. + */ +typedef struct _xsltStyleBasicEmptyItem xsltStyleBasicEmptyItem; +typedef xsltStyleBasicEmptyItem *xsltStyleBasicEmptyItemPtr; + +struct _xsltStyleBasicEmptyItem { + XSLT_ITEM_COMMON_FIELDS +}; + +/** + * xsltStyleBasicExpressionItem: + * + * Abstract structure only used as a short-cut for + * XSLT items with just an expression. + */ +typedef struct _xsltStyleBasicExpressionItem xsltStyleBasicExpressionItem; +typedef xsltStyleBasicExpressionItem *xsltStyleBasicExpressionItemPtr; + +struct _xsltStyleBasicExpressionItem { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *select; /* TODO: Change this to "expression". */ + xmlXPathCompExprPtr comp; /* TODO: Change this to compExpr. */ +}; + +/************************************************************************ + * * + * XSLT-instructions/declarations * + * * + ************************************************************************/ + +/** + * xsltStyleItemElement: + * + * <!-- Category: instruction --> + * <xsl:element + * name = { qname } + * namespace = { uri-reference } + * use-attribute-sets = qnames> + * <!-- Content: template --> + * </xsl:element> + */ +typedef struct _xsltStyleItemElement xsltStyleItemElement; +typedef xsltStyleItemElement *xsltStyleItemElementPtr; + +struct _xsltStyleItemElement { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *use; + int has_use; + const xmlChar *name; + int has_name; + const xmlChar *ns; + const xmlChar *nsPrefix; + int has_ns; +}; + +/** + * xsltStyleItemAttribute: + * + * <!-- Category: instruction --> + * <xsl:attribute + * name = { qname } + * namespace = { uri-reference }> + * <!-- Content: template --> + * </xsl:attribute> + */ +typedef struct _xsltStyleItemAttribute xsltStyleItemAttribute; +typedef xsltStyleItemAttribute *xsltStyleItemAttributePtr; + +struct _xsltStyleItemAttribute { + XSLT_ITEM_COMMON_FIELDS + const xmlChar *name; + int has_name; + const xmlChar *ns; + const xmlChar *nsPrefix; + int has_ns; +}; + +/** + * xsltStyleItemText: + * + * <!-- Category: instruction --> + * <xsl:text + * disable-output-escaping = "yes" | "no"> + * <!-- Content: #PCDATA --> + * </xsl:text> + */ +typedef struct _xsltStyleItemText xsltStyleItemText; +typedef xsltStyleItemText *xsltStyleItemTextPtr; + +struct _xsltStyleItemText { + XSLT_ITEM_COMMON_FIELDS + int noescape; /* text */ +}; + +/** + * xsltStyleItemComment: + * + * <!-- Category: instruction --> + * <xsl:comment> + * <!-- Content: template --> + * </xsl:comment> + */ +typedef xsltStyleBasicEmptyItem xsltStyleItemComment; +typedef xsltStyleItemComment *xsltStyleItemCommentPtr; + +/** + * xsltStyleItemPI: + * + * <!-- Category: instruction --> + * <xsl:processing-instruction + * name = { ncname }> + * <!-- Content: template --> + * </xsl:processing-instruction> + */ +typedef struct _xsltStyleItemPI xsltStyleItemPI; +typedef xsltStyleItemPI *xsltStyleItemPIPtr; + +struct _xsltStyleItemPI { + XSLT_ITEM_COMMON_FIELDS + const xmlChar *name; + int has_name; +}; + +/** + * xsltStyleItemApplyImports: + * + * <!-- Category: instruction --> + * <xsl:apply-imports /> + */ +typedef xsltStyleBasicEmptyItem xsltStyleItemApplyImports; +typedef xsltStyleItemApplyImports *xsltStyleItemApplyImportsPtr; + +/** + * xsltStyleItemApplyTemplates: + * + * <!-- Category: instruction --> + * <xsl:apply-templates + * select = node-set-expression + * mode = qname> + * <!-- Content: (xsl:sort | xsl:with-param)* --> + * </xsl:apply-templates> + */ +typedef struct _xsltStyleItemApplyTemplates xsltStyleItemApplyTemplates; +typedef xsltStyleItemApplyTemplates *xsltStyleItemApplyTemplatesPtr; + +struct _xsltStyleItemApplyTemplates { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *mode; /* apply-templates */ + const xmlChar *modeURI; /* apply-templates */ + const xmlChar *select; /* sort, copy-of, value-of, apply-templates */ + xmlXPathCompExprPtr comp; /* a precompiled XPath expression */ + /* TODO: with-params */ +}; + +/** + * xsltStyleItemCallTemplate: + * + * <!-- Category: instruction --> + * <xsl:call-template + * name = qname> + * <!-- Content: xsl:with-param* --> + * </xsl:call-template> + */ +typedef struct _xsltStyleItemCallTemplate xsltStyleItemCallTemplate; +typedef xsltStyleItemCallTemplate *xsltStyleItemCallTemplatePtr; + +struct _xsltStyleItemCallTemplate { + XSLT_ITEM_COMMON_FIELDS + + xsltTemplatePtr templ; /* call-template */ + const xmlChar *name; /* element, attribute, pi */ + int has_name; /* element, attribute, pi */ + const xmlChar *ns; /* element */ + int has_ns; /* element */ + /* TODO: with-params */ +}; + +/** + * xsltStyleItemCopy: + * + * <!-- Category: instruction --> + * <xsl:copy + * use-attribute-sets = qnames> + * <!-- Content: template --> + * </xsl:copy> + */ +typedef struct _xsltStyleItemCopy xsltStyleItemCopy; +typedef xsltStyleItemCopy *xsltStyleItemCopyPtr; + +struct _xsltStyleItemCopy { + XSLT_ITEM_COMMON_FIELDS + const xmlChar *use; /* copy, element */ + int has_use; /* copy, element */ +}; + +/** + * xsltStyleItemIf: + * + * <!-- Category: instruction --> + * <xsl:if + * test = boolean-expression> + * <!-- Content: template --> + * </xsl:if> + */ +typedef struct _xsltStyleItemIf xsltStyleItemIf; +typedef xsltStyleItemIf *xsltStyleItemIfPtr; + +struct _xsltStyleItemIf { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *test; /* if */ + xmlXPathCompExprPtr comp; /* a precompiled XPath expression */ +}; + + +/** + * xsltStyleItemCopyOf: + * + * <!-- Category: instruction --> + * <xsl:copy-of + * select = expression /> + */ +typedef xsltStyleBasicExpressionItem xsltStyleItemCopyOf; +typedef xsltStyleItemCopyOf *xsltStyleItemCopyOfPtr; + +/** + * xsltStyleItemValueOf: + * + * <!-- Category: instruction --> + * <xsl:value-of + * select = string-expression + * disable-output-escaping = "yes" | "no" /> + */ +typedef struct _xsltStyleItemValueOf xsltStyleItemValueOf; +typedef xsltStyleItemValueOf *xsltStyleItemValueOfPtr; + +struct _xsltStyleItemValueOf { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *select; + xmlXPathCompExprPtr comp; /* a precompiled XPath expression */ + int noescape; +}; + +/** + * xsltStyleItemNumber: + * + * <!-- Category: instruction --> + * <xsl:number + * level = "single" | "multiple" | "any" + * count = pattern + * from = pattern + * value = number-expression + * format = { string } + * lang = { nmtoken } + * letter-value = { "alphabetic" | "traditional" } + * grouping-separator = { char } + * grouping-size = { number } /> + */ +typedef struct _xsltStyleItemNumber xsltStyleItemNumber; +typedef xsltStyleItemNumber *xsltStyleItemNumberPtr; + +struct _xsltStyleItemNumber { + XSLT_ITEM_COMMON_FIELDS + xsltNumberData numdata; /* number */ +}; + +/** + * xsltStyleItemChoose: + * + * <!-- Category: instruction --> + * <xsl:choose> + * <!-- Content: (xsl:when+, xsl:otherwise?) --> + * </xsl:choose> + */ +typedef xsltStyleBasicEmptyItem xsltStyleItemChoose; +typedef xsltStyleItemChoose *xsltStyleItemChoosePtr; + +/** + * xsltStyleItemFallback: + * + * <!-- Category: instruction --> + * <xsl:fallback> + * <!-- Content: template --> + * </xsl:fallback> + */ +typedef xsltStyleBasicEmptyItem xsltStyleItemFallback; +typedef xsltStyleItemFallback *xsltStyleItemFallbackPtr; + +/** + * xsltStyleItemForEach: + * + * <!-- Category: instruction --> + * <xsl:for-each + * select = node-set-expression> + * <!-- Content: (xsl:sort*, template) --> + * </xsl:for-each> + */ +typedef xsltStyleBasicExpressionItem xsltStyleItemForEach; +typedef xsltStyleItemForEach *xsltStyleItemForEachPtr; + +/** + * xsltStyleItemMessage: + * + * <!-- Category: instruction --> + * <xsl:message + * terminate = "yes" | "no"> + * <!-- Content: template --> + * </xsl:message> + */ +typedef struct _xsltStyleItemMessage xsltStyleItemMessage; +typedef xsltStyleItemMessage *xsltStyleItemMessagePtr; + +struct _xsltStyleItemMessage { + XSLT_ITEM_COMMON_FIELDS + int terminate; +}; + +/** + * xsltStyleItemDocument: + * + * NOTE: This is not an instruction of XSLT 1.0. + */ +typedef struct _xsltStyleItemDocument xsltStyleItemDocument; +typedef xsltStyleItemDocument *xsltStyleItemDocumentPtr; + +struct _xsltStyleItemDocument { + XSLT_ITEM_COMMON_FIELDS + int ver11; /* assigned: in xsltDocumentComp; + read: nowhere; + TODO: Check if we need. */ + const xmlChar *filename; /* document URL */ + int has_filename; +}; + +/************************************************************************ + * * + * Non-instructions (actually properties of instructions/declarations) * + * * + ************************************************************************/ + +/** + * xsltStyleBasicItemVariable: + * + * Basic struct for xsl:variable, xsl:param and xsl:with-param. + * It's currently important to have equal fields, since + * xsltParseStylesheetCallerParam() is used with xsl:with-param from + * the xslt side and with xsl:param from the exslt side (in + * exsltFuncFunctionFunction()). + * + * FUTURE NOTE: In XSLT 2.0 xsl:param, xsl:variable and xsl:with-param + * have additional different fields. + */ +typedef struct _xsltStyleBasicItemVariable xsltStyleBasicItemVariable; +typedef xsltStyleBasicItemVariable *xsltStyleBasicItemVariablePtr; + +struct _xsltStyleBasicItemVariable { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *select; + xmlXPathCompExprPtr comp; + + const xmlChar *name; + int has_name; + const xmlChar *ns; + int has_ns; +}; + +/** + * xsltStyleItemVariable: + * + * <!-- Category: top-level-element --> + * <xsl:param + * name = qname + * select = expression> + * <!-- Content: template --> + * </xsl:param> + */ +typedef xsltStyleBasicItemVariable xsltStyleItemVariable; +typedef xsltStyleItemVariable *xsltStyleItemVariablePtr; + +/** + * xsltStyleItemParam: + * + * <!-- Category: top-level-element --> + * <xsl:param + * name = qname + * select = expression> + * <!-- Content: template --> + * </xsl:param> + */ +typedef struct _xsltStyleItemParam xsltStyleItemParam; +typedef xsltStyleItemParam *xsltStyleItemParamPtr; + +struct _xsltStyleItemParam { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *select; + xmlXPathCompExprPtr comp; + + const xmlChar *name; + int has_name; + const xmlChar *ns; + int has_ns; +}; + +/** + * xsltStyleItemWithParam: + * + * <xsl:with-param + * name = qname + * select = expression> + * <!-- Content: template --> + * </xsl:with-param> + */ +typedef xsltStyleBasicItemVariable xsltStyleItemWithParam; +typedef xsltStyleItemWithParam *xsltStyleItemWithParamPtr; + +/** + * xsltStyleItemSort: + * + * Reflects the XSLT xsl:sort item. + * Allowed parents: xsl:apply-templates, xsl:for-each + * <xsl:sort + * select = string-expression + * lang = { nmtoken } + * data-type = { "text" | "number" | qname-but-not-ncname } + * order = { "ascending" | "descending" } + * case-order = { "upper-first" | "lower-first" } /> + */ +typedef struct _xsltStyleItemSort xsltStyleItemSort; +typedef xsltStyleItemSort *xsltStyleItemSortPtr; + +struct _xsltStyleItemSort { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *stype; /* sort */ + int has_stype; /* sort */ + int number; /* sort */ + const xmlChar *order; /* sort */ + int has_order; /* sort */ + int descending; /* sort */ + const xmlChar *lang; /* sort */ + int has_lang; /* sort */ + const xmlChar *case_order; /* sort */ + int lower_first; /* sort */ + + const xmlChar *use; + int has_use; + + const xmlChar *select; /* sort, copy-of, value-of, apply-templates */ + + xmlXPathCompExprPtr comp; /* a precompiled XPath expression */ +}; + + +/** + * xsltStyleItemWhen: + * + * <xsl:when + * test = boolean-expression> + * <!-- Content: template --> + * </xsl:when> + * Allowed parent: xsl:choose + */ +typedef struct _xsltStyleItemWhen xsltStyleItemWhen; +typedef xsltStyleItemWhen *xsltStyleItemWhenPtr; + +struct _xsltStyleItemWhen { + XSLT_ITEM_COMMON_FIELDS + + const xmlChar *test; + xmlXPathCompExprPtr comp; +}; + +/** + * xsltStyleItemOtherwise: + * + * Allowed parent: xsl:choose + * <xsl:otherwise> + * <!-- Content: template --> + * </xsl:otherwise> + */ +typedef struct _xsltStyleItemOtherwise xsltStyleItemOtherwise; +typedef xsltStyleItemOtherwise *xsltStyleItemOtherwisePtr; + +struct _xsltStyleItemOtherwise { + XSLT_ITEM_COMMON_FIELDS +}; + +typedef struct _xsltStyleItemInclude xsltStyleItemInclude; +typedef xsltStyleItemInclude *xsltStyleItemIncludePtr; + +struct _xsltStyleItemInclude { + XSLT_ITEM_COMMON_FIELDS + xsltDocumentPtr include; +}; + +/************************************************************************ + * * + * XSLT elements in forwards-compatible mode * + * * + ************************************************************************/ + +typedef struct _xsltStyleItemUknown xsltStyleItemUknown; +typedef xsltStyleItemUknown *xsltStyleItemUknownPtr; +struct _xsltStyleItemUknown { + XSLT_ITEM_COMMON_FIELDS +}; + + +/************************************************************************ + * * + * Extension elements * + * * + ************************************************************************/ + +/* + * xsltStyleItemExtElement: + * + * Reflects extension elements. + * + * NOTE: Due to the fact that the structure xsltElemPreComp is most + * probably already heavily in use out there by users, so we cannot + * easily change it, we'll create an intermediate structure which will + * hold an xsltElemPreCompPtr. + * BIG NOTE: The only problem I see here is that the user processes the + * content of the stylesheet tree, possibly he'll lookup the node->psvi + * fields in order to find subsequent extension functions. + * In this case, the user's code will break, since the node->psvi + * field will hold now the xsltStyleItemExtElementPtr and not + * the xsltElemPreCompPtr. + * However the place where the structure is anchored in the node-tree, + * namely node->psvi, has beed already once been moved from node->_private + * to node->psvi, so we have a precedent here, which, I think, should allow + * us to change such semantics without headaches. + */ +typedef struct _xsltStyleItemExtElement xsltStyleItemExtElement; +typedef xsltStyleItemExtElement *xsltStyleItemExtElementPtr; +struct _xsltStyleItemExtElement { + XSLT_ITEM_COMMON_FIELDS + xsltElemPreCompPtr item; +}; + +/************************************************************************ + * * + * Literal result elements * + * * + ************************************************************************/ + +typedef struct _xsltEffectiveNs xsltEffectiveNs; +typedef xsltEffectiveNs *xsltEffectiveNsPtr; +struct _xsltEffectiveNs { + xsltEffectiveNsPtr nextInStore; /* storage next */ + xsltEffectiveNsPtr next; /* next item in the list */ + const xmlChar *prefix; + const xmlChar *nsName; + /* + * Indicates if eclared on the literal result element; dunno if really + * needed. + */ + int holdByElem; +}; + +/* + * Info for literal result elements. + * This will be set on the elem->psvi field and will be + * shared by literal result elements, which have the same + * excluded result namespaces; i.e., this *won't* be created uniquely + * for every literal result element. + */ +typedef struct _xsltStyleItemLRElementInfo xsltStyleItemLRElementInfo; +typedef xsltStyleItemLRElementInfo *xsltStyleItemLRElementInfoPtr; +struct _xsltStyleItemLRElementInfo { + XSLT_ITEM_COMMON_FIELDS + /* + * @effectiveNs is the set of effective ns-nodes + * on the literal result element, which will be added to the result + * element if not already existing in the result tree. + * This means that excluded namespaces (via exclude-result-prefixes, + * extension-element-prefixes and the XSLT namespace) not added + * to the set. + * Namespace-aliasing was applied on the @effectiveNs. + */ + xsltEffectiveNsPtr effectiveNs; + +}; + +#ifdef XSLT_REFACTORED + +typedef struct _xsltNsAlias xsltNsAlias; +typedef xsltNsAlias *xsltNsAliasPtr; +struct _xsltNsAlias { + xsltNsAliasPtr next; /* next in the list */ + xmlNsPtr literalNs; + xmlNsPtr targetNs; + xmlDocPtr docOfTargetNs; +}; +#endif + +#ifdef XSLT_REFACTORED_XSLT_NSCOMP + +typedef struct _xsltNsMap xsltNsMap; +typedef xsltNsMap *xsltNsMapPtr; +struct _xsltNsMap { + xsltNsMapPtr next; /* next in the list */ + xmlDocPtr doc; + xmlNodePtr elem; /* the element holding the ns-decl */ + xmlNsPtr ns; /* the xmlNs structure holding the XML namespace name */ + const xmlChar *origNsName; /* the original XML namespace name */ + const xmlChar *newNsName; /* the mapped XML namespace name */ +}; +#endif + +/************************************************************************ + * * + * Compile-time structures for *internal* use only * + * * + ************************************************************************/ + +typedef struct _xsltPrincipalStylesheetData xsltPrincipalStylesheetData; +typedef xsltPrincipalStylesheetData *xsltPrincipalStylesheetDataPtr; + +typedef struct _xsltNsList xsltNsList; +typedef xsltNsList *xsltNsListPtr; +struct _xsltNsList { + xsltNsListPtr next; /* next in the list */ + xmlNsPtr ns; +}; + +/* +* xsltVarInfo: +* +* Used at compilation time for parameters and variables. +*/ +typedef struct _xsltVarInfo xsltVarInfo; +typedef xsltVarInfo *xsltVarInfoPtr; +struct _xsltVarInfo { + xsltVarInfoPtr next; /* next in the list */ + xsltVarInfoPtr prev; + int depth; /* the depth in the tree */ + const xmlChar *name; + const xmlChar *nsName; +}; + +/** + * xsltCompilerNodeInfo: + * + * Per-node information during compile-time. + */ +typedef struct _xsltCompilerNodeInfo xsltCompilerNodeInfo; +typedef xsltCompilerNodeInfo *xsltCompilerNodeInfoPtr; +struct _xsltCompilerNodeInfo { + xsltCompilerNodeInfoPtr next; + xsltCompilerNodeInfoPtr prev; + xmlNodePtr node; + int depth; + xsltTemplatePtr templ; /* The owning template */ + int category; /* XSLT element, LR-element or + extension element */ + xsltStyleType type; + xsltElemPreCompPtr item; /* The compiled information */ + /* The current in-scope namespaces */ + xsltNsListContainerPtr inScopeNs; + /* The current excluded result namespaces */ + xsltPointerListPtr exclResultNs; + /* The current extension instruction namespaces */ + xsltPointerListPtr extElemNs; + + /* The current info for literal result elements. */ + xsltStyleItemLRElementInfoPtr litResElemInfo; + /* + * Set to 1 if in-scope namespaces changed, + * or excluded result namespaces changed, + * or extension element namespaces changed. + * This will trigger creation of new infos + * for literal result elements. + */ + int nsChanged; + int preserveWhitespace; + int stripWhitespace; + int isRoot; /* whether this is the stylesheet's root node */ + int forwardsCompat; /* whether forwards-compatible mode is enabled */ + /* whether the content of an extension element was processed */ + int extContentHandled; + /* the type of the current child */ + xsltStyleType curChildType; +}; + +/** + * XSLT_CCTXT: + * + * get pointer to compiler context + */ +#define XSLT_CCTXT(style) ((xsltCompilerCtxtPtr) style->compCtxt) + +typedef enum { + XSLT_ERROR_SEVERITY_ERROR = 0, + XSLT_ERROR_SEVERITY_WARNING +} xsltErrorSeverityType; + +typedef struct _xsltCompilerCtxt xsltCompilerCtxt; +typedef xsltCompilerCtxt *xsltCompilerCtxtPtr; +struct _xsltCompilerCtxt { + void *errorCtxt; /* user specific error context */ + /* + * used for error/warning reports; e.g. XSLT_ERROR_SEVERITY_WARNING */ + xsltErrorSeverityType errSeverity; + int warnings; /* TODO: number of warnings found at + compilation */ + int errors; /* TODO: number of errors found at + compilation */ + xmlDictPtr dict; + xsltStylesheetPtr style; + int simplified; /* whether this is a simplified stylesheet */ + /* TODO: structured/unstructured error contexts. */ + int depth; /* Current depth of processing */ + + xsltCompilerNodeInfoPtr inode; + xsltCompilerNodeInfoPtr inodeList; + xsltCompilerNodeInfoPtr inodeLast; + xsltPointerListPtr tmpList; /* Used for various purposes */ + /* + * The XSLT version as specified by the stylesheet's root element. + */ + int isInclude; + int hasForwardsCompat; /* whether forwards-compatible mode was used + in a parsing episode */ + int maxNodeInfos; /* TEMP TODO: just for the interest */ + int maxLREs; /* TEMP TODO: just for the interest */ + /* + * In order to keep the old behaviour, applying strict rules of + * the spec can be turned off. This has effect only on special + * mechanisms like whitespace-stripping in the stylesheet. + */ + int strict; + xsltPrincipalStylesheetDataPtr psData; + xsltStyleItemUknownPtr unknownItem; + int hasNsAliases; /* Indicator if there was an xsl:namespace-alias. */ + xsltNsAliasPtr nsAliases; + xsltVarInfoPtr ivars; /* Storage of local in-scope variables/params. */ + xsltVarInfoPtr ivar; /* topmost local variable/param. */ +}; + +#else /* XSLT_REFACTORED */ +/* +* The old structures before refactoring. +*/ + +/** + * _xsltStylePreComp: + * + * The in-memory structure corresponding to XSLT stylesheet constructs + * precomputed data. + */ +struct _xsltStylePreComp { + xsltElemPreCompPtr next; /* chained list */ + xsltStyleType type; /* type of the element */ + xsltTransformFunction func; /* handling function */ + xmlNodePtr inst; /* the instruction */ + + /* + * Pre computed values. + */ + + const xmlChar *stype; /* sort */ + int has_stype; /* sort */ + int number; /* sort */ + const xmlChar *order; /* sort */ + int has_order; /* sort */ + int descending; /* sort */ + const xmlChar *lang; /* sort */ + int has_lang; /* sort */ + const xmlChar *case_order; /* sort */ + int lower_first; /* sort */ + + const xmlChar *use; /* copy, element */ + int has_use; /* copy, element */ + + int noescape; /* text */ + + const xmlChar *name; /* element, attribute, pi */ + int has_name; /* element, attribute, pi */ + const xmlChar *ns; /* element */ + int has_ns; /* element */ + + const xmlChar *mode; /* apply-templates */ + const xmlChar *modeURI; /* apply-templates */ + + const xmlChar *test; /* if */ + + xsltTemplatePtr templ; /* call-template */ + + const xmlChar *select; /* sort, copy-of, value-of, apply-templates */ + + int ver11; /* document */ + const xmlChar *filename; /* document URL */ + int has_filename; /* document */ + + xsltNumberData numdata; /* number */ + + xmlXPathCompExprPtr comp; /* a precompiled XPath expression */ + xmlNsPtr *nsList; /* the namespaces in scope */ + int nsNr; /* the number of namespaces in scope */ +}; + +#endif /* XSLT_REFACTORED */ + + +/* + * The in-memory structure corresponding to an XSLT Variable + * or Param. + */ +typedef struct _xsltStackElem xsltStackElem; +typedef xsltStackElem *xsltStackElemPtr; +struct _xsltStackElem { + struct _xsltStackElem *next;/* chained list */ + xsltStylePreCompPtr comp; /* the compiled form */ + int computed; /* was the evaluation done */ + const xmlChar *name; /* the local part of the name QName */ + const xmlChar *nameURI; /* the URI part of the name QName */ + const xmlChar *select; /* the eval string */ + xmlNodePtr tree; /* the sequence constructor if no eval + string or the location */ + xmlXPathObjectPtr value; /* The value if computed */ + xmlDocPtr fragment; /* The Result Tree Fragments (needed for XSLT 1.0) + which are bound to the variable's lifetime. */ + int level; /* the depth in the tree; + -1 if persistent (e.g. a given xsl:with-param) */ + xsltTransformContextPtr context; /* The transformation context; needed to cache + the variables */ + int flags; +}; + +#ifdef XSLT_REFACTORED + +struct _xsltPrincipalStylesheetData { + /* + * Namespace dictionary for ns-prefixes and ns-names: + * TODO: Shared between stylesheets, and XPath mechanisms. + * Not used yet. + */ + xmlDictPtr namespaceDict; + /* + * Global list of in-scope namespaces. + */ + xsltPointerListPtr inScopeNamespaces; + /* + * Global list of information for [xsl:]excluded-result-prefixes. + */ + xsltPointerListPtr exclResultNamespaces; + /* + * Global list of information for [xsl:]extension-element-prefixes. + */ + xsltPointerListPtr extElemNamespaces; + xsltEffectiveNsPtr effectiveNs; +#ifdef XSLT_REFACTORED_XSLT_NSCOMP + /* + * Namespace name map to get rid of string comparison of namespace names. + */ + xsltNsMapPtr nsMap; +#endif +}; + + +#endif +/* + * Note that we added a @compCtxt field to anchor an stylesheet compilation + * context, since, due to historical reasons, various compile-time function + * take only the stylesheet as argument and not a compilation context. + */ +struct _xsltStylesheet { + /* + * The stylesheet import relation is kept as a tree. + */ + struct _xsltStylesheet *parent; + struct _xsltStylesheet *next; + struct _xsltStylesheet *imports; + + xsltDocumentPtr docList; /* the include document list */ + + /* + * General data on the style sheet document. + */ + xmlDocPtr doc; /* the parsed XML stylesheet */ + xmlHashTablePtr stripSpaces;/* the hash table of the strip-space and + preserve space elements */ + int stripAll; /* strip-space * (1) preserve-space * (-1) */ + xmlHashTablePtr cdataSection;/* the hash table of the cdata-section */ + + /* + * Global variable or parameters. + */ + xsltStackElemPtr variables; /* linked list of param and variables */ + + /* + * Template descriptions. + */ + xsltTemplatePtr templates; /* the ordered list of templates */ + xmlHashTablePtr templatesHash; /* hash table or wherever compiled + templates information is stored */ + struct _xsltCompMatch *rootMatch; /* template based on / */ + struct _xsltCompMatch *keyMatch; /* template based on key() */ + struct _xsltCompMatch *elemMatch; /* template based on * */ + struct _xsltCompMatch *attrMatch; /* template based on @* */ + struct _xsltCompMatch *parentMatch; /* template based on .. */ + struct _xsltCompMatch *textMatch; /* template based on text() */ + struct _xsltCompMatch *piMatch; /* template based on + processing-instruction() */ + struct _xsltCompMatch *commentMatch; /* template based on comment() */ + + /* + * Namespace aliases. + * NOTE: Not used in the refactored code. + */ + xmlHashTablePtr nsAliases; /* the namespace alias hash tables */ + + /* + * Attribute sets. + */ + xmlHashTablePtr attributeSets;/* the attribute sets hash tables */ + + /* + * Namespaces. + * TODO: Eliminate this. + */ + xmlHashTablePtr nsHash; /* the set of namespaces in use: + ATTENTION: This is used for + execution of XPath expressions; unfortunately + it restricts the stylesheet to have distinct + prefixes. + TODO: We need to get rid of this. + */ + void *nsDefs; /* ATTENTION TODO: This is currently used to store + xsltExtDefPtr (in extensions.c) and + *not* xmlNsPtr. + */ + + /* + * Key definitions. + */ + void *keys; /* key definitions */ + + /* + * Output related stuff. + */ + xmlChar *method; /* the output method */ + xmlChar *methodURI; /* associated namespace if any */ + xmlChar *version; /* version string */ + xmlChar *encoding; /* encoding string */ + int omitXmlDeclaration; /* omit-xml-declaration = "yes" | "no" */ + + /* + * Number formatting. + */ + xsltDecimalFormatPtr decimalFormat; + int standalone; /* standalone = "yes" | "no" */ + xmlChar *doctypePublic; /* doctype-public string */ + xmlChar *doctypeSystem; /* doctype-system string */ + int indent; /* should output being indented */ + xmlChar *mediaType; /* media-type string */ + + /* + * Precomputed blocks. + */ + xsltElemPreCompPtr preComps;/* list of precomputed blocks */ + int warnings; /* number of warnings found at compilation */ + int errors; /* number of errors found at compilation */ + + xmlChar *exclPrefix; /* last excluded prefixes */ + xmlChar **exclPrefixTab; /* array of excluded prefixes */ + int exclPrefixNr; /* number of excluded prefixes in scope */ + int exclPrefixMax; /* size of the array */ + + void *_private; /* user defined data */ + + /* + * Extensions. + */ + xmlHashTablePtr extInfos; /* the extension data */ + int extrasNr; /* the number of extras required */ + + /* + * For keeping track of nested includes + */ + xsltDocumentPtr includes; /* points to last nested include */ + + /* + * dictionary: shared between stylesheet, context and documents. + */ + xmlDictPtr dict; + /* + * precompiled attribute value templates. + */ + void *attVTs; + /* + * if namespace-alias has an alias for the default stylesheet prefix + * NOTE: Not used in the refactored code. + */ + const xmlChar *defaultAlias; + /* + * bypass pre-processing (already done) (used in imports) + */ + int nopreproc; + /* + * all document text strings were internalized + */ + int internalized; + /* + * Literal Result Element as Stylesheet c.f. section 2.3 + */ + int literal_result; + /* + * The principal stylesheet + */ + xsltStylesheetPtr principal; +#ifdef XSLT_REFACTORED + /* + * Compilation context used during compile-time. + */ + xsltCompilerCtxtPtr compCtxt; /* TODO: Change this to (void *). */ + + xsltPrincipalStylesheetDataPtr principalData; +#endif + /* + * Forwards-compatible processing + */ + int forwards_compatible; + + xmlHashTablePtr namedTemplates; /* hash table of named templates */ + + xmlXPathContextPtr xpathCtxt; + + unsigned long opLimit; + unsigned long opCount; +}; + +typedef struct _xsltTransformCache xsltTransformCache; +typedef xsltTransformCache *xsltTransformCachePtr; +struct _xsltTransformCache { + xmlDocPtr RVT; + int nbRVT; + xsltStackElemPtr stackItems; + int nbStackItems; +#ifdef XSLT_DEBUG_PROFILE_CACHE + int dbgCachedRVTs; + int dbgReusedRVTs; + int dbgCachedVars; + int dbgReusedVars; +#endif +}; + +/* + * The in-memory structure corresponding to an XSLT Transformation. + */ +typedef enum { + XSLT_OUTPUT_XML = 0, + XSLT_OUTPUT_HTML, + XSLT_OUTPUT_TEXT +} xsltOutputType; + +typedef void * +(*xsltNewLocaleFunc)(const xmlChar *lang, int lowerFirst); +typedef void +(*xsltFreeLocaleFunc)(void *locale); +typedef xmlChar * +(*xsltGenSortKeyFunc)(void *locale, const xmlChar *lang); + +typedef enum { + XSLT_STATE_OK = 0, + XSLT_STATE_ERROR, + XSLT_STATE_STOPPED +} xsltTransformState; + +struct _xsltTransformContext { + xsltStylesheetPtr style; /* the stylesheet used */ + xsltOutputType type; /* the type of output */ + + xsltTemplatePtr templ; /* the current template */ + int templNr; /* Nb of templates in the stack */ + int templMax; /* Size of the templtes stack */ + xsltTemplatePtr *templTab; /* the template stack */ + + xsltStackElemPtr vars; /* the current variable list */ + int varsNr; /* Nb of variable list in the stack */ + int varsMax; /* Size of the variable list stack */ + xsltStackElemPtr *varsTab; /* the variable list stack */ + int varsBase; /* the var base for current templ */ + + /* + * Extensions + */ + xmlHashTablePtr extFunctions; /* the extension functions */ + xmlHashTablePtr extElements; /* the extension elements */ + xmlHashTablePtr extInfos; /* the extension data */ + + const xmlChar *mode; /* the current mode */ + const xmlChar *modeURI; /* the current mode URI */ + + xsltDocumentPtr docList; /* the document list */ + + xsltDocumentPtr document; /* the current source document; can be NULL if an RTF */ + xmlNodePtr node; /* the current node being processed */ + xmlNodeSetPtr nodeList; /* the current node list */ + /* xmlNodePtr current; the node */ + + xmlDocPtr output; /* the resulting document */ + xmlNodePtr insert; /* the insertion node */ + + xmlXPathContextPtr xpathCtxt; /* the XPath context */ + xsltTransformState state; /* the current state */ + + /* + * Global variables + */ + xmlHashTablePtr globalVars; /* the global variables and params */ + + xmlNodePtr inst; /* the instruction in the stylesheet */ + + int xinclude; /* should XInclude be processed */ + + const char * outputFile; /* the output URI if known */ + + int profile; /* is this run profiled */ + long prof; /* the current profiled value */ + int profNr; /* Nb of templates in the stack */ + int profMax; /* Size of the templtaes stack */ + long *profTab; /* the profile template stack */ + + void *_private; /* user defined data */ + + int extrasNr; /* the number of extras used */ + int extrasMax; /* the number of extras allocated */ + xsltRuntimeExtraPtr extras; /* extra per runtime information */ + + xsltDocumentPtr styleList; /* the stylesheet docs list */ + void * sec; /* the security preferences if any */ + + xmlGenericErrorFunc error; /* a specific error handler */ + void * errctx; /* context for the error handler */ + + xsltSortFunc sortfunc; /* a ctxt specific sort routine */ + + /* + * handling of temporary Result Value Tree + * (XSLT 1.0 term: "Result Tree Fragment") + */ + xmlDocPtr tmpRVT; /* list of RVT without persistance */ + xmlDocPtr persistRVT; /* list of persistant RVTs */ + int ctxtflags; /* context processing flags */ + + /* + * Speed optimization when coalescing text nodes + */ + const xmlChar *lasttext; /* last text node content */ + int lasttsize; /* last text node size */ + int lasttuse; /* last text node use */ + /* + * Per Context Debugging + */ + int debugStatus; /* the context level debug status */ + unsigned long* traceCode; /* pointer to the variable holding the mask */ + + int parserOptions; /* parser options xmlParserOption */ + + /* + * dictionary: shared between stylesheet, context and documents. + */ + xmlDictPtr dict; + xmlDocPtr tmpDoc; /* Obsolete; not used in the library. */ + /* + * all document text strings are internalized + */ + int internalized; + int nbKeys; + int hasTemplKeyPatterns; + xsltTemplatePtr currentTemplateRule; /* the Current Template Rule */ + xmlNodePtr initialContextNode; + xmlDocPtr initialContextDoc; + xsltTransformCachePtr cache; + void *contextVariable; /* the current variable item */ + xmlDocPtr localRVT; /* list of local tree fragments; will be freed when + the instruction which created the fragment + exits */ + xmlDocPtr localRVTBase; /* Obsolete */ + int keyInitLevel; /* Needed to catch recursive keys issues */ + int depth; /* Needed to catch recursions */ + int maxTemplateDepth; + int maxTemplateVars; + unsigned long opLimit; + unsigned long opCount; + int sourceDocDirty; + unsigned long currentId; /* For generate-id() */ + + xsltNewLocaleFunc newLocale; + xsltFreeLocaleFunc freeLocale; + xsltGenSortKeyFunc genSortKey; +}; + +/** + * CHECK_STOPPED: + * + * Macro to check if the XSLT processing should be stopped. + * Will return from the function. + */ +#define CHECK_STOPPED if (ctxt->state == XSLT_STATE_STOPPED) return; + +/** + * CHECK_STOPPEDE: + * + * Macro to check if the XSLT processing should be stopped. + * Will goto the error: label. + */ +#define CHECK_STOPPEDE if (ctxt->state == XSLT_STATE_STOPPED) goto error; + +/** + * CHECK_STOPPED0: + * + * Macro to check if the XSLT processing should be stopped. + * Will return from the function with a 0 value. + */ +#define CHECK_STOPPED0 if (ctxt->state == XSLT_STATE_STOPPED) return(0); + +/* + * The macro XML_CAST_FPTR is a hack to avoid a gcc warning about + * possible incompatibilities between function pointers and object + * pointers. It is defined in libxml/hash.h within recent versions + * of libxml2, but is put here for compatibility. + */ +#ifndef XML_CAST_FPTR +/** + * XML_CAST_FPTR: + * @fptr: pointer to a function + * + * Macro to do a casting from an object pointer to a + * function pointer without encountering a warning from + * gcc + * + * #define XML_CAST_FPTR(fptr) (*(void **)(&fptr)) + * This macro violated ISO C aliasing rules (gcc4 on s390 broke) + * so it is disabled now + */ + +#define XML_CAST_FPTR(fptr) fptr +#endif +/* + * Functions associated to the internal types +xsltDecimalFormatPtr xsltDecimalFormatGetByName(xsltStylesheetPtr sheet, + xmlChar *name); + */ +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltNewStylesheet (void); +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltParseStylesheetFile (const xmlChar* filename); +XSLTPUBFUN void XSLTCALL + xsltFreeStylesheet (xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltIsBlank (xmlChar *str); +XSLTPUBFUN void XSLTCALL + xsltFreeStackElemList (xsltStackElemPtr elem); +XSLTPUBFUN xsltDecimalFormatPtr XSLTCALL + xsltDecimalFormatGetByName(xsltStylesheetPtr style, + xmlChar *name); +XSLTPUBFUN xsltDecimalFormatPtr XSLTCALL + xsltDecimalFormatGetByQName(xsltStylesheetPtr style, + const xmlChar *nsUri, + const xmlChar *name); + +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltParseStylesheetProcess(xsltStylesheetPtr ret, + xmlDocPtr doc); +XSLTPUBFUN void XSLTCALL + xsltParseStylesheetOutput(xsltStylesheetPtr style, + xmlNodePtr cur); +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltParseStylesheetDoc (xmlDocPtr doc); +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltParseStylesheetImportedDoc(xmlDocPtr doc, + xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltParseStylesheetUser(xsltStylesheetPtr style, + xmlDocPtr doc); +XSLTPUBFUN xsltStylesheetPtr XSLTCALL + xsltLoadStylesheetPI (xmlDocPtr doc); +XSLTPUBFUN void XSLTCALL + xsltNumberFormat (xsltTransformContextPtr ctxt, + xsltNumberDataPtr data, + xmlNodePtr node); +XSLTPUBFUN xmlXPathError XSLTCALL + xsltFormatNumberConversion(xsltDecimalFormatPtr self, + xmlChar *format, + double number, + xmlChar **result); + +XSLTPUBFUN void XSLTCALL + xsltParseTemplateContent(xsltStylesheetPtr style, + xmlNodePtr templ); +XSLTPUBFUN int XSLTCALL + xsltAllocateExtra (xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltAllocateExtraCtxt (xsltTransformContextPtr ctxt); +/* + * Extra functions for Result Value Trees + */ +XSLTPUBFUN xmlDocPtr XSLTCALL + xsltCreateRVT (xsltTransformContextPtr ctxt); +XSLTPUBFUN int XSLTCALL + xsltRegisterTmpRVT (xsltTransformContextPtr ctxt, + xmlDocPtr RVT); +XSLTPUBFUN int XSLTCALL + xsltRegisterLocalRVT (xsltTransformContextPtr ctxt, + xmlDocPtr RVT); +XSLTPUBFUN int XSLTCALL + xsltRegisterPersistRVT (xsltTransformContextPtr ctxt, + xmlDocPtr RVT); +XSLTPUBFUN int XSLTCALL + xsltExtensionInstructionResultRegister( + xsltTransformContextPtr ctxt, + xmlXPathObjectPtr obj); +XSLTPUBFUN int XSLTCALL + xsltExtensionInstructionResultFinalize( + xsltTransformContextPtr ctxt); +XSLTPUBFUN int XSLTCALL + xsltFlagRVTs( + xsltTransformContextPtr ctxt, + xmlXPathObjectPtr obj, + int val); +XSLTPUBFUN void XSLTCALL + xsltFreeRVTs (xsltTransformContextPtr ctxt); +XSLTPUBFUN void XSLTCALL + xsltReleaseRVT (xsltTransformContextPtr ctxt, + xmlDocPtr RVT); +/* + * Extra functions for Attribute Value Templates + */ +XSLTPUBFUN void XSLTCALL + xsltCompileAttr (xsltStylesheetPtr style, + xmlAttrPtr attr); +XSLTPUBFUN xmlChar * XSLTCALL + xsltEvalAVT (xsltTransformContextPtr ctxt, + void *avt, + xmlNodePtr node); +XSLTPUBFUN void XSLTCALL + xsltFreeAVTList (void *avt); + +/* + * Extra function for successful xsltCleanupGlobals / xsltInit sequence. + */ + +XSLTPUBFUN void XSLTCALL + xsltUninit (void); + +/************************************************************************ + * * + * Compile-time functions for *internal* use only * + * * + ************************************************************************/ + +#ifdef XSLT_REFACTORED +XSLTPUBFUN void XSLTCALL + xsltParseSequenceConstructor( + xsltCompilerCtxtPtr cctxt, + xmlNodePtr start); +XSLTPUBFUN int XSLTCALL + xsltParseAnyXSLTElem (xsltCompilerCtxtPtr cctxt, + xmlNodePtr elem); +#ifdef XSLT_REFACTORED_XSLT_NSCOMP +XSLTPUBFUN int XSLTCALL + xsltRestoreDocumentNamespaces( + xsltNsMapPtr ns, + xmlDocPtr doc); +#endif +#endif /* XSLT_REFACTORED */ + +/************************************************************************ + * * + * Transformation-time functions for *internal* use only * + * * + ************************************************************************/ +XSLTPUBFUN int XSLTCALL + xsltInitCtxtKey (xsltTransformContextPtr ctxt, + xsltDocumentPtr doc, + xsltKeyDefPtr keyd); +XSLTPUBFUN int XSLTCALL + xsltInitAllDocKeys (xsltTransformContextPtr ctxt); +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLT_H__ */ + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltconfig.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltconfig.h new file mode 100644 index 00000000..7e2c79ae --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltconfig.h @@ -0,0 +1,146 @@ +/* + * Summary: compile-time version information for the XSLT engine + * Description: compile-time version information for the XSLT engine + * this module is autogenerated. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLTCONFIG_H__ +#define __XML_XSLTCONFIG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * LIBXSLT_DOTTED_VERSION: + * + * the version string like "1.2.3" + */ +#define LIBXSLT_DOTTED_VERSION "1.1.42" + +/** + * LIBXSLT_VERSION: + * + * the version number: 1.2.3 value is 10203 + */ +#define LIBXSLT_VERSION 10142 + +/** + * LIBXSLT_VERSION_STRING: + * + * the version number string, 1.2.3 value is "10203" + */ +#define LIBXSLT_VERSION_STRING "10142" + +/** + * LIBXSLT_VERSION_EXTRA: + * + * extra version information, used to show a Git commit description + */ +#define LIBXSLT_VERSION_EXTRA "" + +/** + * WITH_XSLT_DEBUG: + * + * Activate the compilation of the debug reporting. Speed penalty + * is insignifiant and being able to run xsltpoc -v is useful. On + * by default unless --without-debug is passed to configure + */ +#if 1 +#define WITH_XSLT_DEBUG +#endif + +/** + * XSLT_NEED_TRIO: + * + * should be activated if the existing libc library lacks some of the + * string formatting function, in that case reuse the Trio ones already + * compiled in the libxml2 library. + */ + +#if 0 +#define XSLT_NEED_TRIO +#endif +#ifdef __VMS +#define HAVE_SYS_STAT_H 1 +#ifndef XSLT_NEED_TRIO +#define XSLT_NEED_TRIO +#endif +#endif + +#ifdef XSLT_NEED_TRIO +#define TRIO_REPLACE_STDIO +#endif + +/** + * WITH_XSLT_DEBUGGER: + * + * Activate the compilation of the debugger support. Speed penalty + * is insignifiant. + * On by default unless --without-debugger is passed to configure + */ +#if 1 +#ifndef WITH_DEBUGGER +#define WITH_DEBUGGER +#endif +#endif + +/** + * WITH_PROFILER: + * + * Activate the compilation of the profiler. Speed penalty + * is insignifiant. + * On by default unless --without-profiler is passed to configure + */ +#if 1 +#ifndef WITH_PROFILER +#define WITH_PROFILER +#endif +#endif + +/** + * WITH_MODULES: + * + * Whether module support is configured into libxslt + * Note: no default module path for win32 platforms + */ +#if 0 +#ifndef WITH_MODULES +#define WITH_MODULES +#endif +#define LIBXSLT_DEFAULT_PLUGINS_PATH() "/project/build/tmp/libxml2/lib/libxslt-plugins" +#endif + +/** + * LIBXSLT_ATTR_FORMAT: + * + * This macro is used to indicate to GCC the parameters are printf-like + */ +#ifdef __GNUC__ +#define LIBXSLT_ATTR_FORMAT(fmt,args) __attribute__((__format__(__printf__,fmt,args))) +#else +#define LIBXSLT_ATTR_FORMAT(fmt,args) +#endif + +/** + * LIBXSLT_PUBLIC: + * + * This macro is used to declare PUBLIC variables for Cygwin and for MSC on Windows + */ +#if !defined LIBXSLT_PUBLIC +#if (defined(__CYGWIN__) || defined _MSC_VER) && !defined IN_LIBXSLT && !defined LIBXSLT_STATIC +#define LIBXSLT_PUBLIC __declspec(dllimport) +#else +#define LIBXSLT_PUBLIC +#endif +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLTCONFIG_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltexports.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltexports.h new file mode 100644 index 00000000..95c352fe --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltexports.h @@ -0,0 +1,64 @@ +/* + * Summary: macros for marking symbols as exportable/importable. + * Description: macros for marking symbols as exportable/importable. + * + * Copy: See Copyright for the status of this software. + */ + +#ifndef __XSLT_EXPORTS_H__ +#define __XSLT_EXPORTS_H__ + +#if defined(_WIN32) || defined(__CYGWIN__) +/** DOC_DISABLE */ + +#ifdef LIBXSLT_STATIC + #define XSLTPUBLIC +#elif defined(IN_LIBXSLT) + #define XSLTPUBLIC __declspec(dllexport) +#else + #define XSLTPUBLIC __declspec(dllimport) +#endif + +#define XSLTCALL __cdecl + +/** DOC_ENABLE */ +#else /* not Windows */ + +/** + * XSLTPUBLIC: + * + * Macro which declares a public symbol + */ +#define XSLTPUBLIC + +/** + * XSLTCALL: + * + * Macro which declares the calling convention for exported functions + */ +#define XSLTCALL + +#endif /* platform switch */ + +/* + * XSLTPUBFUN: + * + * Macro which declares an exportable function + */ +#define XSLTPUBFUN XSLTPUBLIC + +/** + * XSLTPUBVAR: + * + * Macro which declares an exportable variable + */ +#define XSLTPUBVAR XSLTPUBLIC extern + +/* Compatibility */ +#if !defined(LIBXSLT_PUBLIC) +#define LIBXSLT_PUBLIC XSLTPUBVAR +#endif + +#endif /* __XSLT_EXPORTS_H__ */ + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltlocale.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltlocale.h new file mode 100644 index 00000000..c8be58d3 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltlocale.h @@ -0,0 +1,44 @@ +/* + * Summary: Locale handling + * Description: Interfaces for locale handling. Needed for language dependent + * sorting. + * + * Copy: See Copyright for the status of this software. + * + * Author: Nick Wellnhofer + */ + +#ifndef __XML_XSLTLOCALE_H__ +#define __XML_XSLTLOCALE_H__ + +#include <libxml/xmlstring.h> +#include "xsltexports.h" + +#ifdef __cplusplus +extern "C" { +#endif + +XSLTPUBFUN void * XSLTCALL + xsltNewLocale (const xmlChar *langName, + int lowerFirst); +XSLTPUBFUN void XSLTCALL + xsltFreeLocale (void *locale); +XSLTPUBFUN xmlChar * XSLTCALL + xsltStrxfrm (void *locale, + const xmlChar *string); +XSLTPUBFUN void XSLTCALL + xsltFreeLocales (void); + +/* Backward compatibility */ +typedef void *xsltLocale; +typedef xmlChar xsltLocaleChar; +XSLTPUBFUN int XSLTCALL + xsltLocaleStrcmp (void *locale, + const xmlChar *str1, + const xmlChar *str2); + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLTLOCALE_H__ */ diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltutils.h b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltutils.h new file mode 100644 index 00000000..2514774b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/libxslt/xsltutils.h @@ -0,0 +1,343 @@ +/* + * Summary: set of utilities for the XSLT engine + * Description: interfaces for the utilities module of the XSLT engine. + * things like message handling, profiling, and other + * generally useful routines. + * + * Copy: See Copyright for the status of this software. + * + * Author: Daniel Veillard + */ + +#ifndef __XML_XSLTUTILS_H__ +#define __XML_XSLTUTILS_H__ + +#include <libxslt/xsltconfig.h> +#include <libxml/xpath.h> +#include <libxml/dict.h> +#include <libxml/xmlerror.h> +#include "xsltexports.h" +#include "xsltInternals.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * XSLT_TODO: + * + * Macro to flag unimplemented blocks. + */ +#define XSLT_TODO \ + xsltGenericError(xsltGenericErrorContext, \ + "Unimplemented block at %s:%d\n", \ + __FILE__, __LINE__); + +/** + * XSLT_STRANGE: + * + * Macro to flag that a problem was detected internally. + */ +#define XSLT_STRANGE \ + xsltGenericError(xsltGenericErrorContext, \ + "Internal error at %s:%d\n", \ + __FILE__, __LINE__); + +/** + * IS_XSLT_ELEM: + * + * Checks that the element pertains to XSLT namespace. + */ +#define IS_XSLT_ELEM(n) \ + (((n) != NULL) && ((n)->type == XML_ELEMENT_NODE) && \ + ((n)->ns != NULL) && (xmlStrEqual((n)->ns->href, XSLT_NAMESPACE))) + +/** + * IS_XSLT_NAME: + * + * Checks the value of an element in XSLT namespace. + */ +#define IS_XSLT_NAME(n, val) \ + (xmlStrEqual((n)->name, (const xmlChar *) (val))) + +/** + * IS_XSLT_REAL_NODE: + * + * Check that a node is a 'real' one: document, element, text or attribute. + */ +#define IS_XSLT_REAL_NODE(n) \ + (((n) != NULL) && \ + (((n)->type == XML_ELEMENT_NODE) || \ + ((n)->type == XML_TEXT_NODE) || \ + ((n)->type == XML_CDATA_SECTION_NODE) || \ + ((n)->type == XML_ATTRIBUTE_NODE) || \ + ((n)->type == XML_DOCUMENT_NODE) || \ + ((n)->type == XML_HTML_DOCUMENT_NODE) || \ + ((n)->type == XML_COMMENT_NODE) || \ + ((n)->type == XML_PI_NODE))) + +/* + * Our own version of namespaced attributes lookup. + */ +XSLTPUBFUN xmlChar * XSLTCALL + xsltGetNsProp (xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +XSLTPUBFUN const xmlChar * XSLTCALL + xsltGetCNsProp (xsltStylesheetPtr style, + xmlNodePtr node, + const xmlChar *name, + const xmlChar *nameSpace); +XSLTPUBFUN int XSLTCALL + xsltGetUTF8Char (const unsigned char *utf, + int *len); +#ifdef IN_LIBXSLT +/** DOC_DISABLE */ +XSLTPUBFUN int XSLTCALL + xsltGetUTF8CharZ (const unsigned char *utf, + int *len); +/** DOC_ENABLE */ +#endif + +/* + * XSLT Debug Tracing Tracing Types + */ +typedef enum { + XSLT_TRACE_ALL = -1, + XSLT_TRACE_NONE = 0, + XSLT_TRACE_COPY_TEXT = 1<<0, + XSLT_TRACE_PROCESS_NODE = 1<<1, + XSLT_TRACE_APPLY_TEMPLATE = 1<<2, + XSLT_TRACE_COPY = 1<<3, + XSLT_TRACE_COMMENT = 1<<4, + XSLT_TRACE_PI = 1<<5, + XSLT_TRACE_COPY_OF = 1<<6, + XSLT_TRACE_VALUE_OF = 1<<7, + XSLT_TRACE_CALL_TEMPLATE = 1<<8, + XSLT_TRACE_APPLY_TEMPLATES = 1<<9, + XSLT_TRACE_CHOOSE = 1<<10, + XSLT_TRACE_IF = 1<<11, + XSLT_TRACE_FOR_EACH = 1<<12, + XSLT_TRACE_STRIP_SPACES = 1<<13, + XSLT_TRACE_TEMPLATES = 1<<14, + XSLT_TRACE_KEYS = 1<<15, + XSLT_TRACE_VARIABLES = 1<<16 +} xsltDebugTraceCodes; + +/** + * XSLT_TRACE: + * + * Control the type of xsl debugtrace messages emitted. + */ +#define XSLT_TRACE(ctxt,code,call) \ + if (ctxt->traceCode && (*(ctxt->traceCode) & code)) \ + call + +XSLTPUBFUN void XSLTCALL + xsltDebugSetDefaultTrace(xsltDebugTraceCodes val); +XSLTPUBFUN xsltDebugTraceCodes XSLTCALL + xsltDebugGetDefaultTrace(void); + +/* + * XSLT specific error and debug reporting functions. + */ +XSLTPUBVAR xmlGenericErrorFunc xsltGenericError; +XSLTPUBVAR void *xsltGenericErrorContext; +XSLTPUBVAR xmlGenericErrorFunc xsltGenericDebug; +XSLTPUBVAR void *xsltGenericDebugContext; + +XSLTPUBFUN void XSLTCALL + xsltPrintErrorContext (xsltTransformContextPtr ctxt, + xsltStylesheetPtr style, + xmlNodePtr node); +XSLTPUBFUN void XSLTCALL + xsltMessage (xsltTransformContextPtr ctxt, + xmlNodePtr node, + xmlNodePtr inst); +XSLTPUBFUN void XSLTCALL + xsltSetGenericErrorFunc (void *ctx, + xmlGenericErrorFunc handler); +XSLTPUBFUN void XSLTCALL + xsltSetGenericDebugFunc (void *ctx, + xmlGenericErrorFunc handler); +XSLTPUBFUN void XSLTCALL + xsltSetTransformErrorFunc (xsltTransformContextPtr ctxt, + void *ctx, + xmlGenericErrorFunc handler); +XSLTPUBFUN void XSLTCALL + xsltTransformError (xsltTransformContextPtr ctxt, + xsltStylesheetPtr style, + xmlNodePtr node, + const char *msg, + ...) LIBXSLT_ATTR_FORMAT(4,5); + +XSLTPUBFUN int XSLTCALL + xsltSetCtxtParseOptions (xsltTransformContextPtr ctxt, + int options); +/* + * Sorting. + */ + +XSLTPUBFUN void XSLTCALL + xsltDocumentSortFunction (xmlNodeSetPtr list); +XSLTPUBFUN void XSLTCALL + xsltSetSortFunc (xsltSortFunc handler); +XSLTPUBFUN void XSLTCALL + xsltSetCtxtSortFunc (xsltTransformContextPtr ctxt, + xsltSortFunc handler); +XSLTPUBFUN void XSLTCALL + xsltSetCtxtLocaleHandlers (xsltTransformContextPtr ctxt, + xsltNewLocaleFunc newLocale, + xsltFreeLocaleFunc freeLocale, + xsltGenSortKeyFunc genSortKey); +XSLTPUBFUN void XSLTCALL + xsltDefaultSortFunction (xsltTransformContextPtr ctxt, + xmlNodePtr *sorts, + int nbsorts); +XSLTPUBFUN void XSLTCALL + xsltDoSortFunction (xsltTransformContextPtr ctxt, + xmlNodePtr * sorts, + int nbsorts); +XSLTPUBFUN xmlXPathObjectPtr * XSLTCALL + xsltComputeSortResult (xsltTransformContextPtr ctxt, + xmlNodePtr sort); + +/* + * QNames handling. + */ + +XSLTPUBFUN const xmlChar * XSLTCALL + xsltSplitQName (xmlDictPtr dict, + const xmlChar *name, + const xmlChar **prefix); +XSLTPUBFUN const xmlChar * XSLTCALL + xsltGetQNameURI (xmlNodePtr node, + xmlChar **name); + +XSLTPUBFUN const xmlChar * XSLTCALL + xsltGetQNameURI2 (xsltStylesheetPtr style, + xmlNodePtr node, + const xmlChar **name); + +/* + * Output, reuse libxml I/O buffers. + */ +XSLTPUBFUN int XSLTCALL + xsltSaveResultTo (xmlOutputBufferPtr buf, + xmlDocPtr result, + xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltSaveResultToFilename (const char *URI, + xmlDocPtr result, + xsltStylesheetPtr style, + int compression); +XSLTPUBFUN int XSLTCALL + xsltSaveResultToFile (FILE *file, + xmlDocPtr result, + xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltSaveResultToFd (int fd, + xmlDocPtr result, + xsltStylesheetPtr style); +XSLTPUBFUN int XSLTCALL + xsltSaveResultToString (xmlChar **doc_txt_ptr, + int * doc_txt_len, + xmlDocPtr result, + xsltStylesheetPtr style); + +/* + * XPath interface + */ +XSLTPUBFUN xmlXPathCompExprPtr XSLTCALL + xsltXPathCompile (xsltStylesheetPtr style, + const xmlChar *str); +XSLTPUBFUN xmlXPathCompExprPtr XSLTCALL + xsltXPathCompileFlags (xsltStylesheetPtr style, + const xmlChar *str, + int flags); + +#ifdef IN_LIBXSLT +/** DOC_DISABLE */ +#define XSLT_SOURCE_NODE_MASK 15u +#define XSLT_SOURCE_NODE_HAS_KEY 1u +#define XSLT_SOURCE_NODE_HAS_ID 2u +int +xsltGetSourceNodeFlags(xmlNodePtr node); +int +xsltSetSourceNodeFlags(xsltTransformContextPtr ctxt, xmlNodePtr node, + int flags); +int +xsltClearSourceNodeFlags(xmlNodePtr node, int flags); +void ** +xsltGetPSVIPtr(xmlNodePtr cur); +/** DOC_ENABLE */ +#endif + +#ifdef WITH_PROFILER +/* + * Profiling. + */ +XSLTPUBFUN void XSLTCALL + xsltSaveProfiling (xsltTransformContextPtr ctxt, + FILE *output); +XSLTPUBFUN xmlDocPtr XSLTCALL + xsltGetProfileInformation (xsltTransformContextPtr ctxt); + +XSLTPUBFUN long XSLTCALL + xsltTimestamp (void); +XSLTPUBFUN void XSLTCALL + xsltCalibrateAdjust (long delta); +#endif + +/** + * XSLT_TIMESTAMP_TICS_PER_SEC: + * + * Sampling precision for profiling + */ +#define XSLT_TIMESTAMP_TICS_PER_SEC 100000l + +/* + * Hooks for the debugger. + */ + +typedef enum { + XSLT_DEBUG_NONE = 0, /* no debugging allowed */ + XSLT_DEBUG_INIT, + XSLT_DEBUG_STEP, + XSLT_DEBUG_STEPOUT, + XSLT_DEBUG_NEXT, + XSLT_DEBUG_STOP, + XSLT_DEBUG_CONT, + XSLT_DEBUG_RUN, + XSLT_DEBUG_RUN_RESTART, + XSLT_DEBUG_QUIT +} xsltDebugStatusCodes; + +XSLTPUBVAR int xslDebugStatus; + +typedef void (*xsltHandleDebuggerCallback) (xmlNodePtr cur, xmlNodePtr node, + xsltTemplatePtr templ, xsltTransformContextPtr ctxt); +typedef int (*xsltAddCallCallback) (xsltTemplatePtr templ, xmlNodePtr source); +typedef void (*xsltDropCallCallback) (void); + +XSLTPUBFUN int XSLTCALL + xsltGetDebuggerStatus (void); +#ifdef WITH_DEBUGGER +XSLTPUBFUN void XSLTCALL + xsltSetDebuggerStatus (int value); +XSLTPUBFUN int XSLTCALL + xsltSetDebuggerCallbacks (int no, void *block); +XSLTPUBFUN int XSLTCALL + xslAddCall (xsltTemplatePtr templ, + xmlNodePtr source); +XSLTPUBFUN void XSLTCALL + xslDropCall (void); +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* __XML_XSLTUTILS_H__ */ + + diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/lxml-version.h b/.venv/lib/python3.12/site-packages/lxml/includes/lxml-version.h new file mode 100644 index 00000000..101c7c91 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/lxml-version.h @@ -0,0 +1,3 @@ +#ifndef LXML_VERSION_STRING +#define LXML_VERSION_STRING "5.3.1" +#endif diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd new file mode 100644 index 00000000..5ac96711 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd @@ -0,0 +1,64 @@ +from lxml.includes.tree cimport xmlDoc +from lxml.includes.xmlerror cimport xmlStructuredErrorFunc + +cdef extern from "libxml/relaxng.h" nogil: + ctypedef struct xmlRelaxNG + ctypedef struct xmlRelaxNGParserCtxt + + ctypedef struct xmlRelaxNGValidCtxt + + ctypedef enum xmlRelaxNGValidErr: + XML_RELAXNG_OK = 0 + XML_RELAXNG_ERR_MEMORY = 1 + XML_RELAXNG_ERR_TYPE = 2 + XML_RELAXNG_ERR_TYPEVAL = 3 + XML_RELAXNG_ERR_DUPID = 4 + XML_RELAXNG_ERR_TYPECMP = 5 + XML_RELAXNG_ERR_NOSTATE = 6 + XML_RELAXNG_ERR_NODEFINE = 7 + XML_RELAXNG_ERR_LISTEXTRA = 8 + XML_RELAXNG_ERR_LISTEMPTY = 9 + XML_RELAXNG_ERR_INTERNODATA = 10 + XML_RELAXNG_ERR_INTERSEQ = 11 + XML_RELAXNG_ERR_INTEREXTRA = 12 + XML_RELAXNG_ERR_ELEMNAME = 13 + XML_RELAXNG_ERR_ATTRNAME = 14 + XML_RELAXNG_ERR_ELEMNONS = 15 + XML_RELAXNG_ERR_ATTRNONS = 16 + XML_RELAXNG_ERR_ELEMWRONGNS = 17 + XML_RELAXNG_ERR_ATTRWRONGNS = 18 + XML_RELAXNG_ERR_ELEMEXTRANS = 19 + XML_RELAXNG_ERR_ATTREXTRANS = 20 + XML_RELAXNG_ERR_ELEMNOTEMPTY = 21 + XML_RELAXNG_ERR_NOELEM = 22 + XML_RELAXNG_ERR_NOTELEM = 23 + XML_RELAXNG_ERR_ATTRVALID = 24 + XML_RELAXNG_ERR_CONTENTVALID = 25 + XML_RELAXNG_ERR_EXTRACONTENT = 26 + XML_RELAXNG_ERR_INVALIDATTR = 27 + XML_RELAXNG_ERR_DATAELEM = 28 + XML_RELAXNG_ERR_VALELEM = 29 + XML_RELAXNG_ERR_LISTELEM = 30 + XML_RELAXNG_ERR_DATATYPE = 31 + XML_RELAXNG_ERR_VALUE = 32 + XML_RELAXNG_ERR_LIST = 33 + XML_RELAXNG_ERR_NOGRAMMAR = 34 + XML_RELAXNG_ERR_EXTRADATA = 35 + XML_RELAXNG_ERR_LACKDATA = 36 + XML_RELAXNG_ERR_INTERNAL = 37 + XML_RELAXNG_ERR_ELEMWRONG = 38 + XML_RELAXNG_ERR_TEXTWRONG = 39 + + cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema) + cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc) + cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt) + cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL) + cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc) + cdef void xmlRelaxNGFree(xmlRelaxNG* schema) + cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt) + cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt) + + cdef void xmlRelaxNGSetValidStructuredErrors( + xmlRelaxNGValidCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) + cdef void xmlRelaxNGSetParserStructuredErrors( + xmlRelaxNGParserCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd new file mode 100644 index 00000000..181248af --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd @@ -0,0 +1,34 @@ +from lxml.includes cimport xmlerror +from lxml.includes.tree cimport xmlDoc + +cdef extern from "libxml/schematron.h" nogil: + ctypedef struct xmlSchematron + ctypedef struct xmlSchematronParserCtxt + ctypedef struct xmlSchematronValidCtxt + + ctypedef enum xmlSchematronValidOptions: + XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report + XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report + XML_SCHEMATRON_OUT_XML = 4 # output SVRL + XML_SCHEMATRON_OUT_ERROR = 8 # output via xmlStructuredErrorFunc + XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor + XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer + XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism + + cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt( + xmlDoc* doc) + cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt( + char* filename) nogil + cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt( + xmlSchematron* schema, int options) + + cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt) + cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt, + xmlDoc* instance) + + cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt) + cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt) + cdef void xmlSchematronFree(xmlSchematron* schema) + cdef void xmlSchematronSetValidStructuredErrors( + xmlSchematronValidCtxt* ctxt, + xmlerror.xmlStructuredErrorFunc error_func, void *data) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/tree.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/tree.pxd new file mode 100644 index 00000000..5e37d9d6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/tree.pxd @@ -0,0 +1,494 @@ +from libc cimport stdio +from libc.string cimport const_char, const_uchar + +cdef extern from "lxml-version.h": + # deprecated declaration, use etreepublic.pxd instead + cdef char* LXML_VERSION_STRING + +cdef extern from "libxml/xmlversion.h": + cdef const_char* xmlParserVersion + cdef int LIBXML_VERSION + +cdef extern from "libxml/xmlstring.h" nogil: + ctypedef unsigned char xmlChar + ctypedef const xmlChar const_xmlChar "const xmlChar" + cdef int xmlStrlen(const_xmlChar* str) + cdef xmlChar* xmlStrdup(const_xmlChar* cur) + cdef int xmlStrncmp(const_xmlChar* str1, const_xmlChar* str2, int length) + cdef int xmlStrcmp(const_xmlChar* str1, const_xmlChar* str2) + cdef int xmlStrcasecmp(const xmlChar *str1, const xmlChar *str2) + cdef const_xmlChar* xmlStrstr(const_xmlChar* str1, const_xmlChar* str2) + cdef const_xmlChar* xmlStrchr(const_xmlChar* str1, xmlChar ch) + cdef const_xmlChar* _xcstr "(const xmlChar*)PyBytes_AS_STRING" (object s) + +cdef extern from "libxml/encoding.h" nogil: + ctypedef enum xmlCharEncoding: + XML_CHAR_ENCODING_ERROR = -1 # No char encoding detected + XML_CHAR_ENCODING_NONE = 0 # No char encoding detected + XML_CHAR_ENCODING_UTF8 = 1 # UTF-8 + XML_CHAR_ENCODING_UTF16LE = 2 # UTF-16 little endian + XML_CHAR_ENCODING_UTF16BE = 3 # UTF-16 big endian + XML_CHAR_ENCODING_UCS4LE = 4 # UCS-4 little endian + XML_CHAR_ENCODING_UCS4BE = 5 # UCS-4 big endian + XML_CHAR_ENCODING_EBCDIC = 6 # EBCDIC uh! + XML_CHAR_ENCODING_UCS4_2143 = 7 # UCS-4 unusual ordering + XML_CHAR_ENCODING_UCS4_3412 = 8 # UCS-4 unusual ordering + XML_CHAR_ENCODING_UCS2 = 9 # UCS-2 + XML_CHAR_ENCODING_8859_1 = 10 # ISO-8859-1 ISO Latin 1 + XML_CHAR_ENCODING_8859_2 = 11 # ISO-8859-2 ISO Latin 2 + XML_CHAR_ENCODING_8859_3 = 12 # ISO-8859-3 + XML_CHAR_ENCODING_8859_4 = 13 # ISO-8859-4 + XML_CHAR_ENCODING_8859_5 = 14 # ISO-8859-5 + XML_CHAR_ENCODING_8859_6 = 15 # ISO-8859-6 + XML_CHAR_ENCODING_8859_7 = 16 # ISO-8859-7 + XML_CHAR_ENCODING_8859_8 = 17 # ISO-8859-8 + XML_CHAR_ENCODING_8859_9 = 18 # ISO-8859-9 + XML_CHAR_ENCODING_2022_JP = 19 # ISO-2022-JP + XML_CHAR_ENCODING_SHIFT_JIS = 20 # Shift_JIS + XML_CHAR_ENCODING_EUC_JP = 21 # EUC-JP + XML_CHAR_ENCODING_ASCII = 22 # pure ASCII + + ctypedef struct xmlCharEncodingHandler: + char* name + + cdef xmlCharEncodingHandler* xmlFindCharEncodingHandler(char* name) + cdef xmlCharEncodingHandler* xmlGetCharEncodingHandler( + xmlCharEncoding enc) + cdef int xmlCharEncCloseFunc(xmlCharEncodingHandler* handler) + cdef xmlCharEncoding xmlDetectCharEncoding(const_xmlChar* text, int len) + cdef const_char* xmlGetCharEncodingName(xmlCharEncoding enc) + cdef xmlCharEncoding xmlParseCharEncoding(char* name) + ctypedef int (*xmlCharEncodingOutputFunc)( + unsigned char *out_buf, int *outlen, const_uchar *in_buf, int *inlen) + +cdef extern from "libxml/chvalid.h" nogil: + cdef int xmlIsChar_ch(char c) + cdef int xmlIsCharQ(int ch) + +cdef extern from "libxml/hash.h": + ctypedef struct xmlHashTable + ctypedef void (*xmlHashScanner)(void* payload, void* data, const_xmlChar* name) noexcept # may require GIL! + void xmlHashScan(xmlHashTable* table, xmlHashScanner f, void* data) nogil + void* xmlHashLookup(xmlHashTable* table, const_xmlChar* name) nogil + ctypedef void (*xmlHashDeallocator)(void *payload, xmlChar *name) noexcept + cdef xmlHashTable* xmlHashCreate(int size) nogil + cdef xmlHashTable* xmlHashCreateDict(int size, xmlDict *dict) nogil + cdef int xmlHashSize(xmlHashTable* table) nogil + cdef void xmlHashFree(xmlHashTable* table, xmlHashDeallocator f) nogil + +cdef extern from * nogil: # actually "libxml/dict.h" + # libxml/dict.h appears to be broken to include in C + ctypedef struct xmlDict + cdef const_xmlChar* xmlDictLookup(xmlDict* dict, const_xmlChar* name, int len) + cdef const_xmlChar* xmlDictExists(xmlDict* dict, const_xmlChar* name, int len) + cdef int xmlDictOwns(xmlDict* dict, const_xmlChar* name) + cdef size_t xmlDictSize(xmlDict* dict) + +cdef extern from "libxml/tree.h" nogil: + ctypedef struct xmlDoc + ctypedef struct xmlAttr + ctypedef struct xmlNotationTable + + ctypedef enum xmlElementType: + XML_ELEMENT_NODE= 1 + XML_ATTRIBUTE_NODE= 2 + XML_TEXT_NODE= 3 + XML_CDATA_SECTION_NODE= 4 + XML_ENTITY_REF_NODE= 5 + XML_ENTITY_NODE= 6 + XML_PI_NODE= 7 + XML_COMMENT_NODE= 8 + XML_DOCUMENT_NODE= 9 + XML_DOCUMENT_TYPE_NODE= 10 + XML_DOCUMENT_FRAG_NODE= 11 + XML_NOTATION_NODE= 12 + XML_HTML_DOCUMENT_NODE= 13 + XML_DTD_NODE= 14 + XML_ELEMENT_DECL= 15 + XML_ATTRIBUTE_DECL= 16 + XML_ENTITY_DECL= 17 + XML_NAMESPACE_DECL= 18 + XML_XINCLUDE_START= 19 + XML_XINCLUDE_END= 20 + + ctypedef enum xmlElementTypeVal: + XML_ELEMENT_TYPE_UNDEFINED= 0 + XML_ELEMENT_TYPE_EMPTY= 1 + XML_ELEMENT_TYPE_ANY= 2 + XML_ELEMENT_TYPE_MIXED= 3 + XML_ELEMENT_TYPE_ELEMENT= 4 + + ctypedef enum xmlElementContentType: + XML_ELEMENT_CONTENT_PCDATA= 1 + XML_ELEMENT_CONTENT_ELEMENT= 2 + XML_ELEMENT_CONTENT_SEQ= 3 + XML_ELEMENT_CONTENT_OR= 4 + + ctypedef enum xmlElementContentOccur: + XML_ELEMENT_CONTENT_ONCE= 1 + XML_ELEMENT_CONTENT_OPT= 2 + XML_ELEMENT_CONTENT_MULT= 3 + XML_ELEMENT_CONTENT_PLUS= 4 + + ctypedef enum xmlAttributeType: + XML_ATTRIBUTE_CDATA = 1 + XML_ATTRIBUTE_ID= 2 + XML_ATTRIBUTE_IDREF= 3 + XML_ATTRIBUTE_IDREFS= 4 + XML_ATTRIBUTE_ENTITY= 5 + XML_ATTRIBUTE_ENTITIES= 6 + XML_ATTRIBUTE_NMTOKEN= 7 + XML_ATTRIBUTE_NMTOKENS= 8 + XML_ATTRIBUTE_ENUMERATION= 9 + XML_ATTRIBUTE_NOTATION= 10 + + ctypedef enum xmlAttributeDefault: + XML_ATTRIBUTE_NONE= 1 + XML_ATTRIBUTE_REQUIRED= 2 + XML_ATTRIBUTE_IMPLIED= 3 + XML_ATTRIBUTE_FIXED= 4 + + ctypedef enum xmlEntityType: + XML_INTERNAL_GENERAL_ENTITY= 1 + XML_EXTERNAL_GENERAL_PARSED_ENTITY= 2 + XML_EXTERNAL_GENERAL_UNPARSED_ENTITY= 3 + XML_INTERNAL_PARAMETER_ENTITY= 4 + XML_EXTERNAL_PARAMETER_ENTITY= 5 + XML_INTERNAL_PREDEFINED_ENTITY= 6 + + ctypedef enum xmlDocProperties: + XML_DOC_WELLFORMED = 1 # /* document is XML well formed */ + XML_DOC_NSVALID = 2 # /* document is Namespace valid */ + XML_DOC_OLD10 = 4 # /* parsed with old XML-1.0 parser */ + XML_DOC_DTDVALID = 8 # /* DTD validation was successful */ + XML_DOC_XINCLUDE = 16 # /* XInclude substitution was done */ + XML_DOC_USERBUILT = 32 # /* Document was built using the API + # and not by parsing an instance */ + XML_DOC_INTERNAL = 64 # /* built for internal processing */ + XML_DOC_HTML = 128 # /* parsed or built HTML document */ + + ctypedef struct xmlNs: + const_xmlChar* href + const_xmlChar* prefix + xmlNs* next + + ctypedef struct xmlNode: + void* _private + xmlElementType type + const_xmlChar* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlChar* content + xmlAttr* properties + xmlNs* ns + xmlNs* nsDef + unsigned short line + + ctypedef struct xmlElementContent: + xmlElementContentType type + xmlElementContentOccur ocur + const_xmlChar *name + xmlElementContent *c1 + xmlElementContent *c2 + xmlElementContent *parent + const_xmlChar *prefix + + ctypedef struct xmlEnumeration: + xmlEnumeration *next + const_xmlChar *name + + ctypedef struct xmlAttribute: + void* _private + xmlElementType type + const_xmlChar* name + xmlNode* children + xmlNode* last + xmlDtd* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlAttribute* nexth + xmlAttributeType atype + xmlAttributeDefault def_ "def" + const_xmlChar* defaultValue + xmlEnumeration* tree + const_xmlChar* prefix + const_xmlChar* elem + + ctypedef struct xmlElement: + void* _private + xmlElementType type + const_xmlChar* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlElementTypeVal etype + xmlElementContent* content + xmlAttribute* attributes + const_xmlChar* prefix + void *contModel + + ctypedef struct xmlEntity: + void* _private + xmlElementType type + const_xmlChar* name + xmlNode* children + xmlNode* last + xmlDtd* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlChar* orig + xmlChar* content + int length + xmlEntityType etype + const_xmlChar* ExternalID + const_xmlChar* SystemID + xmlEntity* nexte + const_xmlChar* URI + int owner + int checked + + ctypedef struct xmlDtd: + const_xmlChar* name + const_xmlChar* ExternalID + const_xmlChar* SystemID + void* notations + void* entities + void* pentities + void* attributes + void* elements + xmlNode* children + xmlNode* last + xmlDoc* doc + + ctypedef struct xmlDoc: + xmlElementType type + char* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlNode* next + xmlNode* prev + xmlDoc* doc + xmlDict* dict + xmlHashTable* ids + int standalone + const_xmlChar* version + const_xmlChar* encoding + const_xmlChar* URL + void* _private + xmlDtd* intSubset + xmlDtd* extSubset + int properties + + ctypedef struct xmlAttr: + void* _private + xmlElementType type + const_xmlChar* name + xmlNode* children + xmlNode* last + xmlNode* parent + xmlAttr* next + xmlAttr* prev + xmlDoc* doc + xmlNs* ns + xmlAttributeType atype + + ctypedef struct xmlID: + const_xmlChar* value + const_xmlChar* name + xmlAttr* attr + xmlDoc* doc + + ctypedef struct xmlBuffer + + ctypedef struct xmlBuf # new in libxml2 2.9 + + ctypedef struct xmlOutputBuffer: + xmlBuf* buffer + xmlBuf* conv + int error + + const_xmlChar* XML_XML_NAMESPACE + + cdef void xmlFreeDoc(xmlDoc* cur) + cdef void xmlFreeDtd(xmlDtd* cur) + cdef void xmlFreeNode(xmlNode* cur) + cdef void xmlFreeNsList(xmlNs* ns) + cdef void xmlFreeNs(xmlNs* ns) + cdef void xmlFree(void* buf) + + cdef xmlNode* xmlNewNode(xmlNs* ns, const_xmlChar* name) + cdef xmlNode* xmlNewDocText(xmlDoc* doc, const_xmlChar* content) + cdef xmlNode* xmlNewDocComment(xmlDoc* doc, const_xmlChar* content) + cdef xmlNode* xmlNewDocPI(xmlDoc* doc, const_xmlChar* name, const_xmlChar* content) + cdef xmlNode* xmlNewReference(xmlDoc* doc, const_xmlChar* name) + cdef xmlNode* xmlNewCDataBlock(xmlDoc* doc, const_xmlChar* text, int len) + cdef xmlNs* xmlNewNs(xmlNode* node, const_xmlChar* href, const_xmlChar* prefix) + cdef xmlNode* xmlAddChild(xmlNode* parent, xmlNode* cur) + cdef xmlNode* xmlReplaceNode(xmlNode* old, xmlNode* cur) + cdef xmlNode* xmlAddPrevSibling(xmlNode* cur, xmlNode* elem) + cdef xmlNode* xmlAddNextSibling(xmlNode* cur, xmlNode* elem) + cdef xmlNode* xmlNewDocNode(xmlDoc* doc, xmlNs* ns, + const_xmlChar* name, const_xmlChar* content) + cdef xmlDoc* xmlNewDoc(const_xmlChar* version) + cdef xmlAttr* xmlNewProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) + cdef xmlAttr* xmlNewNsProp(xmlNode* node, xmlNs* ns, + const_xmlChar* name, const_xmlChar* value) + cdef xmlChar* xmlGetNoNsProp(xmlNode* node, const_xmlChar* name) + cdef xmlChar* xmlGetNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) + cdef void xmlSetNs(xmlNode* node, xmlNs* ns) + cdef xmlAttr* xmlSetProp(xmlNode* node, const_xmlChar* name, const_xmlChar* value) + cdef xmlAttr* xmlSetNsProp(xmlNode* node, xmlNs* ns, + const_xmlChar* name, const_xmlChar* value) + cdef int xmlRemoveID(xmlDoc* doc, xmlAttr* cur) + cdef int xmlRemoveProp(xmlAttr* cur) + cdef void xmlFreePropList(xmlAttr* cur) + cdef xmlChar* xmlGetNodePath(xmlNode* node) + cdef void xmlDocDumpMemory(xmlDoc* cur, char** mem, int* size) + cdef void xmlDocDumpMemoryEnc(xmlDoc* cur, char** mem, int* size, + char* encoding) + cdef int xmlSaveFileTo(xmlOutputBuffer* out, xmlDoc* cur, + char* encoding) + + cdef void xmlUnlinkNode(xmlNode* cur) + cdef xmlNode* xmlDocSetRootElement(xmlDoc* doc, xmlNode* root) + cdef xmlNode* xmlDocGetRootElement(xmlDoc* doc) + cdef void xmlSetTreeDoc(xmlNode* tree, xmlDoc* doc) + cdef xmlAttr* xmlHasProp(xmlNode* node, const_xmlChar* name) + cdef xmlAttr* xmlHasNsProp(xmlNode* node, const_xmlChar* name, const_xmlChar* nameSpace) + cdef xmlChar* xmlNodeGetContent(xmlNode* cur) + cdef int xmlNodeBufGetContent(xmlBuffer* buffer, xmlNode* cur) + cdef xmlNs* xmlSearchNs(xmlDoc* doc, xmlNode* node, const_xmlChar* prefix) + cdef xmlNs* xmlSearchNsByHref(xmlDoc* doc, xmlNode* node, const_xmlChar* href) + cdef int xmlIsBlankNode(xmlNode* node) + cdef long xmlGetLineNo(xmlNode* node) + cdef void xmlElemDump(stdio.FILE* f, xmlDoc* doc, xmlNode* cur) + cdef void xmlNodeDumpOutput(xmlOutputBuffer* buf, + xmlDoc* doc, xmlNode* cur, int level, + int format, const_char* encoding) + cdef void xmlBufAttrSerializeTxtContent(xmlOutputBuffer *buf, xmlDoc *doc, + xmlAttr *attr, const_xmlChar *string) + cdef void xmlNodeSetName(xmlNode* cur, const_xmlChar* name) + cdef void xmlNodeSetContent(xmlNode* cur, const_xmlChar* content) + cdef xmlDtd* xmlCopyDtd(xmlDtd* dtd) + cdef xmlDoc* xmlCopyDoc(xmlDoc* doc, int recursive) + cdef xmlNode* xmlCopyNode(xmlNode* node, int extended) + cdef xmlNode* xmlDocCopyNode(xmlNode* node, xmlDoc* doc, int extended) + cdef int xmlReconciliateNs(xmlDoc* doc, xmlNode* tree) + cdef xmlNs* xmlNewReconciliedNs(xmlDoc* doc, xmlNode* tree, xmlNs* ns) + cdef xmlBuffer* xmlBufferCreate() + cdef void xmlBufferWriteChar(xmlBuffer* buf, char* string) + cdef void xmlBufferFree(xmlBuffer* buf) + cdef const_xmlChar* xmlBufferContent(xmlBuffer* buf) + cdef int xmlBufferLength(xmlBuffer* buf) + cdef const_xmlChar* xmlBufContent(xmlBuf* buf) # new in libxml2 2.9 + cdef size_t xmlBufUse(xmlBuf* buf) # new in libxml2 2.9 + cdef int xmlKeepBlanksDefault(int val) + cdef xmlChar* xmlNodeGetBase(xmlDoc* doc, xmlNode* node) + cdef xmlDtd* xmlCreateIntSubset(xmlDoc* doc, const_xmlChar* name, + const_xmlChar* ExternalID, const_xmlChar* SystemID) + cdef void xmlNodeSetBase(xmlNode* node, const_xmlChar* uri) + cdef int xmlValidateNCName(const_xmlChar* value, int space) + +cdef extern from "libxml/uri.h" nogil: + cdef const_xmlChar* xmlBuildURI(const_xmlChar* href, const_xmlChar* base) + +cdef extern from "libxml/HTMLtree.h" nogil: + cdef void htmlNodeDumpFormatOutput(xmlOutputBuffer* buf, + xmlDoc* doc, xmlNode* cur, + char* encoding, int format) + cdef xmlDoc* htmlNewDoc(const_xmlChar* uri, const_xmlChar* externalID) + +cdef extern from "libxml/valid.h" nogil: + cdef xmlAttr* xmlGetID(xmlDoc* doc, const_xmlChar* ID) + cdef void xmlDumpNotationTable(xmlBuffer* buffer, + xmlNotationTable* table) + cdef int xmlValidateNameValue(const_xmlChar* value) + +cdef extern from "libxml/xmlIO.h": + cdef int xmlOutputBufferWrite(xmlOutputBuffer* out, + int len, const_char* str) nogil + cdef int xmlOutputBufferWriteString(xmlOutputBuffer* out, const_char* str) nogil + cdef int xmlOutputBufferWriteEscape(xmlOutputBuffer* out, + const_xmlChar* str, + xmlCharEncodingOutputFunc escapefunc) nogil + cdef int xmlOutputBufferFlush(xmlOutputBuffer* out) nogil + cdef int xmlOutputBufferClose(xmlOutputBuffer* out) nogil + + ctypedef int (*xmlInputReadCallback)(void* context, + char* buffer, int len) noexcept nogil + ctypedef int (*xmlInputCloseCallback)(void* context) noexcept nogil + + ctypedef int (*xmlOutputWriteCallback)(void* context, + char* buffer, int len) noexcept + ctypedef int (*xmlOutputCloseCallback)(void* context) noexcept + + cdef xmlOutputBuffer* xmlAllocOutputBuffer( + xmlCharEncodingHandler* encoder) nogil + cdef xmlOutputBuffer* xmlOutputBufferCreateIO( + xmlOutputWriteCallback iowrite, + xmlOutputCloseCallback ioclose, + void * ioctx, + xmlCharEncodingHandler* encoder) nogil + cdef xmlOutputBuffer* xmlOutputBufferCreateFile( + stdio.FILE* file, xmlCharEncodingHandler* encoder) nogil + cdef xmlOutputBuffer* xmlOutputBufferCreateFilename( + char* URI, xmlCharEncodingHandler* encoder, int compression) nogil + +cdef extern from "libxml/xmlsave.h" nogil: + ctypedef struct xmlSaveCtxt + + ctypedef enum xmlSaveOption: + XML_SAVE_FORMAT = 1 # format save output (2.6.17) + XML_SAVE_NO_DECL = 2 # drop the xml declaration (2.6.21) + XML_SAVE_NO_EMPTY = 4 # no empty tags (2.6.22) + XML_SAVE_NO_XHTML = 8 # disable XHTML1 specific rules (2.6.22) + XML_SAVE_XHTML = 16 # force XHTML1 specific rules (2.7.2) + XML_SAVE_AS_XML = 32 # force XML serialization on HTML doc (2.7.2) + XML_SAVE_AS_HTML = 64 # force HTML serialization on XML doc (2.7.2) + + cdef xmlSaveCtxt* xmlSaveToFilename(char* filename, char* encoding, + int options) + cdef xmlSaveCtxt* xmlSaveToBuffer(xmlBuffer* buffer, char* encoding, + int options) # libxml2 2.6.23 + cdef long xmlSaveDoc(xmlSaveCtxt* ctxt, xmlDoc* doc) + cdef long xmlSaveTree(xmlSaveCtxt* ctxt, xmlNode* node) + cdef int xmlSaveClose(xmlSaveCtxt* ctxt) + cdef int xmlSaveFlush(xmlSaveCtxt* ctxt) + cdef int xmlSaveSetAttrEscape(xmlSaveCtxt* ctxt, void* escape_func) + cdef int xmlSaveSetEscape(xmlSaveCtxt* ctxt, void* escape_func) + +cdef extern from "libxml/globals.h" nogil: + cdef int xmlThrDefKeepBlanksDefaultValue(int onoff) + cdef int xmlThrDefLineNumbersDefaultValue(int onoff) + cdef int xmlThrDefIndentTreeOutput(int onoff) + +cdef extern from "libxml/xmlmemory.h" nogil: + cdef void* xmlMalloc(size_t size) + cdef int xmlMemBlocks() + cdef int xmlMemUsed() + cdef void xmlMemDisplay(stdio.FILE* file) + cdef void xmlMemDisplayLast(stdio.FILE* file, long num_bytes) + cdef void xmlMemShow(stdio.FILE* file, int count) + +cdef extern from "etree_defs.h" nogil: + cdef bint _isElement(xmlNode* node) + cdef bint _isElementOrXInclude(xmlNode* node) + cdef const_xmlChar* _getNs(xmlNode* node) + cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* tree_top, + xmlNode* start_node, + bint inclusive) + cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* start_node) + cdef void BEGIN_FOR_EACH_FROM(xmlNode* tree_top, + xmlNode* start_node, + bint inclusive) + cdef void END_FOR_EACH_FROM(xmlNode* start_node) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/uri.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/uri.pxd new file mode 100644 index 00000000..f886a54b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/uri.pxd @@ -0,0 +1,5 @@ +cdef extern from "libxml/uri.h" nogil: + ctypedef struct xmlURI + + cdef xmlURI* xmlParseURI(char* str) + cdef void xmlFreeURI(xmlURI* uri) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xinclude.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xinclude.pxd new file mode 100644 index 00000000..68267175 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xinclude.pxd @@ -0,0 +1,22 @@ +from lxml.includes.tree cimport xmlDoc, xmlNode + +cdef extern from "libxml/xinclude.h" nogil: + + ctypedef struct xmlXIncludeCtxt + + cdef int xmlXIncludeProcess(xmlDoc* doc) + cdef int xmlXIncludeProcessFlags(xmlDoc* doc, int parser_opts) + cdef int xmlXIncludeProcessTree(xmlNode* doc) + cdef int xmlXIncludeProcessTreeFlags(xmlNode* doc, int parser_opts) + + # libxml2 >= 2.7.4 + cdef int xmlXIncludeProcessTreeFlagsData( + xmlNode* doc, int parser_opts, void* data) + + cdef xmlXIncludeCtxt* xmlXIncludeNewContext(xmlDoc* doc) + cdef int xmlXIncludeProcessNode(xmlXIncludeCtxt* ctxt, xmlNode* node) + cdef int xmlXIncludeSetFlags(xmlXIncludeCtxt* ctxt, int flags) + + # libxml2 >= 2.6.27 + cdef int xmlXIncludeProcessFlagsData( + xmlDoc* doc, int flags, void* data) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xmlerror.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xmlerror.pxd new file mode 100644 index 00000000..589e38ea --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xmlerror.pxd @@ -0,0 +1,852 @@ + +# --- BEGIN: GENERATED CONSTANTS --- + +# This section is generated by the script 'update-error-constants.py'. + +cdef extern from "libxml/xmlerror.h": + ctypedef enum xmlErrorLevel: + XML_ERR_NONE = 0 + XML_ERR_WARNING = 1 # A simple warning + XML_ERR_ERROR = 2 # A recoverable error + XML_ERR_FATAL = 3 # A fatal error + + ctypedef enum xmlErrorDomain: + XML_FROM_NONE = 0 + XML_FROM_PARSER = 1 # The XML parser + XML_FROM_TREE = 2 # The tree module + XML_FROM_NAMESPACE = 3 # The XML Namespace module + XML_FROM_DTD = 4 # The XML DTD validation with parser contex + XML_FROM_HTML = 5 # The HTML parser + XML_FROM_MEMORY = 6 # The memory allocator + XML_FROM_OUTPUT = 7 # The serialization code + XML_FROM_IO = 8 # The Input/Output stack + XML_FROM_FTP = 9 # The FTP module + XML_FROM_HTTP = 10 # The HTTP module + XML_FROM_XINCLUDE = 11 # The XInclude processing + XML_FROM_XPATH = 12 # The XPath module + XML_FROM_XPOINTER = 13 # The XPointer module + XML_FROM_REGEXP = 14 # The regular expressions module + XML_FROM_DATATYPE = 15 # The W3C XML Schemas Datatype module + XML_FROM_SCHEMASP = 16 # The W3C XML Schemas parser module + XML_FROM_SCHEMASV = 17 # The W3C XML Schemas validation module + XML_FROM_RELAXNGP = 18 # The Relax-NG parser module + XML_FROM_RELAXNGV = 19 # The Relax-NG validator module + XML_FROM_CATALOG = 20 # The Catalog module + XML_FROM_C14N = 21 # The Canonicalization module + XML_FROM_XSLT = 22 # The XSLT engine from libxslt + XML_FROM_VALID = 23 # The XML DTD validation with valid context + XML_FROM_CHECK = 24 # The error checking module + XML_FROM_WRITER = 25 # The xmlwriter module + XML_FROM_MODULE = 26 # The dynamically loaded module modul + XML_FROM_I18N = 27 # The module handling character conversion + XML_FROM_SCHEMATRONV = 28 # The Schematron validator module + XML_FROM_BUFFER = 29 # The buffers module + XML_FROM_URI = 30 # The URI module + + ctypedef enum xmlParserErrors: + XML_ERR_OK = 0 + XML_ERR_INTERNAL_ERROR = 1 + XML_ERR_NO_MEMORY = 2 + XML_ERR_DOCUMENT_START = 3 + XML_ERR_DOCUMENT_EMPTY = 4 + XML_ERR_DOCUMENT_END = 5 + XML_ERR_INVALID_HEX_CHARREF = 6 + XML_ERR_INVALID_DEC_CHARREF = 7 + XML_ERR_INVALID_CHARREF = 8 + XML_ERR_INVALID_CHAR = 9 + XML_ERR_CHARREF_AT_EOF = 10 + XML_ERR_CHARREF_IN_PROLOG = 11 + XML_ERR_CHARREF_IN_EPILOG = 12 + XML_ERR_CHARREF_IN_DTD = 13 + XML_ERR_ENTITYREF_AT_EOF = 14 + XML_ERR_ENTITYREF_IN_PROLOG = 15 + XML_ERR_ENTITYREF_IN_EPILOG = 16 + XML_ERR_ENTITYREF_IN_DTD = 17 + XML_ERR_PEREF_AT_EOF = 18 + XML_ERR_PEREF_IN_PROLOG = 19 + XML_ERR_PEREF_IN_EPILOG = 20 + XML_ERR_PEREF_IN_INT_SUBSET = 21 + XML_ERR_ENTITYREF_NO_NAME = 22 + XML_ERR_ENTITYREF_SEMICOL_MISSING = 23 + XML_ERR_PEREF_NO_NAME = 24 + XML_ERR_PEREF_SEMICOL_MISSING = 25 + XML_ERR_UNDECLARED_ENTITY = 26 + XML_WAR_UNDECLARED_ENTITY = 27 + XML_ERR_UNPARSED_ENTITY = 28 + XML_ERR_ENTITY_IS_EXTERNAL = 29 + XML_ERR_ENTITY_IS_PARAMETER = 30 + XML_ERR_UNKNOWN_ENCODING = 31 + XML_ERR_UNSUPPORTED_ENCODING = 32 + XML_ERR_STRING_NOT_STARTED = 33 + XML_ERR_STRING_NOT_CLOSED = 34 + XML_ERR_NS_DECL_ERROR = 35 + XML_ERR_ENTITY_NOT_STARTED = 36 + XML_ERR_ENTITY_NOT_FINISHED = 37 + XML_ERR_LT_IN_ATTRIBUTE = 38 + XML_ERR_ATTRIBUTE_NOT_STARTED = 39 + XML_ERR_ATTRIBUTE_NOT_FINISHED = 40 + XML_ERR_ATTRIBUTE_WITHOUT_VALUE = 41 + XML_ERR_ATTRIBUTE_REDEFINED = 42 + XML_ERR_LITERAL_NOT_STARTED = 43 + XML_ERR_LITERAL_NOT_FINISHED = 44 + XML_ERR_COMMENT_NOT_FINISHED = 45 + XML_ERR_PI_NOT_STARTED = 46 + XML_ERR_PI_NOT_FINISHED = 47 + XML_ERR_NOTATION_NOT_STARTED = 48 + XML_ERR_NOTATION_NOT_FINISHED = 49 + XML_ERR_ATTLIST_NOT_STARTED = 50 + XML_ERR_ATTLIST_NOT_FINISHED = 51 + XML_ERR_MIXED_NOT_STARTED = 52 + XML_ERR_MIXED_NOT_FINISHED = 53 + XML_ERR_ELEMCONTENT_NOT_STARTED = 54 + XML_ERR_ELEMCONTENT_NOT_FINISHED = 55 + XML_ERR_XMLDECL_NOT_STARTED = 56 + XML_ERR_XMLDECL_NOT_FINISHED = 57 + XML_ERR_CONDSEC_NOT_STARTED = 58 + XML_ERR_CONDSEC_NOT_FINISHED = 59 + XML_ERR_EXT_SUBSET_NOT_FINISHED = 60 + XML_ERR_DOCTYPE_NOT_FINISHED = 61 + XML_ERR_MISPLACED_CDATA_END = 62 + XML_ERR_CDATA_NOT_FINISHED = 63 + XML_ERR_RESERVED_XML_NAME = 64 + XML_ERR_SPACE_REQUIRED = 65 + XML_ERR_SEPARATOR_REQUIRED = 66 + XML_ERR_NMTOKEN_REQUIRED = 67 + XML_ERR_NAME_REQUIRED = 68 + XML_ERR_PCDATA_REQUIRED = 69 + XML_ERR_URI_REQUIRED = 70 + XML_ERR_PUBID_REQUIRED = 71 + XML_ERR_LT_REQUIRED = 72 + XML_ERR_GT_REQUIRED = 73 + XML_ERR_LTSLASH_REQUIRED = 74 + XML_ERR_EQUAL_REQUIRED = 75 + XML_ERR_TAG_NAME_MISMATCH = 76 + XML_ERR_TAG_NOT_FINISHED = 77 + XML_ERR_STANDALONE_VALUE = 78 + XML_ERR_ENCODING_NAME = 79 + XML_ERR_HYPHEN_IN_COMMENT = 80 + XML_ERR_INVALID_ENCODING = 81 + XML_ERR_EXT_ENTITY_STANDALONE = 82 + XML_ERR_CONDSEC_INVALID = 83 + XML_ERR_VALUE_REQUIRED = 84 + XML_ERR_NOT_WELL_BALANCED = 85 + XML_ERR_EXTRA_CONTENT = 86 + XML_ERR_ENTITY_CHAR_ERROR = 87 + XML_ERR_ENTITY_PE_INTERNAL = 88 + XML_ERR_ENTITY_LOOP = 89 + XML_ERR_ENTITY_BOUNDARY = 90 + XML_ERR_INVALID_URI = 91 + XML_ERR_URI_FRAGMENT = 92 + XML_WAR_CATALOG_PI = 93 + XML_ERR_NO_DTD = 94 + XML_ERR_CONDSEC_INVALID_KEYWORD = 95 + XML_ERR_VERSION_MISSING = 96 + XML_WAR_UNKNOWN_VERSION = 97 + XML_WAR_LANG_VALUE = 98 + XML_WAR_NS_URI = 99 + XML_WAR_NS_URI_RELATIVE = 100 + XML_ERR_MISSING_ENCODING = 101 + XML_WAR_SPACE_VALUE = 102 + XML_ERR_NOT_STANDALONE = 103 + XML_ERR_ENTITY_PROCESSING = 104 + XML_ERR_NOTATION_PROCESSING = 105 + XML_WAR_NS_COLUMN = 106 + XML_WAR_ENTITY_REDEFINED = 107 + XML_ERR_UNKNOWN_VERSION = 108 + XML_ERR_VERSION_MISMATCH = 109 + XML_ERR_NAME_TOO_LONG = 110 + XML_ERR_USER_STOP = 111 + XML_ERR_COMMENT_ABRUPTLY_ENDED = 112 + XML_NS_ERR_XML_NAMESPACE = 200 + XML_NS_ERR_UNDEFINED_NAMESPACE = 201 + XML_NS_ERR_QNAME = 202 + XML_NS_ERR_ATTRIBUTE_REDEFINED = 203 + XML_NS_ERR_EMPTY = 204 + XML_NS_ERR_COLON = 205 + XML_DTD_ATTRIBUTE_DEFAULT = 500 + XML_DTD_ATTRIBUTE_REDEFINED = 501 + XML_DTD_ATTRIBUTE_VALUE = 502 + XML_DTD_CONTENT_ERROR = 503 + XML_DTD_CONTENT_MODEL = 504 + XML_DTD_CONTENT_NOT_DETERMINIST = 505 + XML_DTD_DIFFERENT_PREFIX = 506 + XML_DTD_ELEM_DEFAULT_NAMESPACE = 507 + XML_DTD_ELEM_NAMESPACE = 508 + XML_DTD_ELEM_REDEFINED = 509 + XML_DTD_EMPTY_NOTATION = 510 + XML_DTD_ENTITY_TYPE = 511 + XML_DTD_ID_FIXED = 512 + XML_DTD_ID_REDEFINED = 513 + XML_DTD_ID_SUBSET = 514 + XML_DTD_INVALID_CHILD = 515 + XML_DTD_INVALID_DEFAULT = 516 + XML_DTD_LOAD_ERROR = 517 + XML_DTD_MISSING_ATTRIBUTE = 518 + XML_DTD_MIXED_CORRUPT = 519 + XML_DTD_MULTIPLE_ID = 520 + XML_DTD_NO_DOC = 521 + XML_DTD_NO_DTD = 522 + XML_DTD_NO_ELEM_NAME = 523 + XML_DTD_NO_PREFIX = 524 + XML_DTD_NO_ROOT = 525 + XML_DTD_NOTATION_REDEFINED = 526 + XML_DTD_NOTATION_VALUE = 527 + XML_DTD_NOT_EMPTY = 528 + XML_DTD_NOT_PCDATA = 529 + XML_DTD_NOT_STANDALONE = 530 + XML_DTD_ROOT_NAME = 531 + XML_DTD_STANDALONE_WHITE_SPACE = 532 + XML_DTD_UNKNOWN_ATTRIBUTE = 533 + XML_DTD_UNKNOWN_ELEM = 534 + XML_DTD_UNKNOWN_ENTITY = 535 + XML_DTD_UNKNOWN_ID = 536 + XML_DTD_UNKNOWN_NOTATION = 537 + XML_DTD_STANDALONE_DEFAULTED = 538 + XML_DTD_XMLID_VALUE = 539 + XML_DTD_XMLID_TYPE = 540 + XML_DTD_DUP_TOKEN = 541 + XML_HTML_STRUCURE_ERROR = 800 + XML_HTML_UNKNOWN_TAG = 801 + XML_RNGP_ANYNAME_ATTR_ANCESTOR = 1000 + XML_RNGP_ATTR_CONFLICT = 1001 + XML_RNGP_ATTRIBUTE_CHILDREN = 1002 + XML_RNGP_ATTRIBUTE_CONTENT = 1003 + XML_RNGP_ATTRIBUTE_EMPTY = 1004 + XML_RNGP_ATTRIBUTE_NOOP = 1005 + XML_RNGP_CHOICE_CONTENT = 1006 + XML_RNGP_CHOICE_EMPTY = 1007 + XML_RNGP_CREATE_FAILURE = 1008 + XML_RNGP_DATA_CONTENT = 1009 + XML_RNGP_DEF_CHOICE_AND_INTERLEAVE = 1010 + XML_RNGP_DEFINE_CREATE_FAILED = 1011 + XML_RNGP_DEFINE_EMPTY = 1012 + XML_RNGP_DEFINE_MISSING = 1013 + XML_RNGP_DEFINE_NAME_MISSING = 1014 + XML_RNGP_ELEM_CONTENT_EMPTY = 1015 + XML_RNGP_ELEM_CONTENT_ERROR = 1016 + XML_RNGP_ELEMENT_EMPTY = 1017 + XML_RNGP_ELEMENT_CONTENT = 1018 + XML_RNGP_ELEMENT_NAME = 1019 + XML_RNGP_ELEMENT_NO_CONTENT = 1020 + XML_RNGP_ELEM_TEXT_CONFLICT = 1021 + XML_RNGP_EMPTY = 1022 + XML_RNGP_EMPTY_CONSTRUCT = 1023 + XML_RNGP_EMPTY_CONTENT = 1024 + XML_RNGP_EMPTY_NOT_EMPTY = 1025 + XML_RNGP_ERROR_TYPE_LIB = 1026 + XML_RNGP_EXCEPT_EMPTY = 1027 + XML_RNGP_EXCEPT_MISSING = 1028 + XML_RNGP_EXCEPT_MULTIPLE = 1029 + XML_RNGP_EXCEPT_NO_CONTENT = 1030 + XML_RNGP_EXTERNALREF_EMTPY = 1031 + XML_RNGP_EXTERNAL_REF_FAILURE = 1032 + XML_RNGP_EXTERNALREF_RECURSE = 1033 + XML_RNGP_FORBIDDEN_ATTRIBUTE = 1034 + XML_RNGP_FOREIGN_ELEMENT = 1035 + XML_RNGP_GRAMMAR_CONTENT = 1036 + XML_RNGP_GRAMMAR_EMPTY = 1037 + XML_RNGP_GRAMMAR_MISSING = 1038 + XML_RNGP_GRAMMAR_NO_START = 1039 + XML_RNGP_GROUP_ATTR_CONFLICT = 1040 + XML_RNGP_HREF_ERROR = 1041 + XML_RNGP_INCLUDE_EMPTY = 1042 + XML_RNGP_INCLUDE_FAILURE = 1043 + XML_RNGP_INCLUDE_RECURSE = 1044 + XML_RNGP_INTERLEAVE_ADD = 1045 + XML_RNGP_INTERLEAVE_CREATE_FAILED = 1046 + XML_RNGP_INTERLEAVE_EMPTY = 1047 + XML_RNGP_INTERLEAVE_NO_CONTENT = 1048 + XML_RNGP_INVALID_DEFINE_NAME = 1049 + XML_RNGP_INVALID_URI = 1050 + XML_RNGP_INVALID_VALUE = 1051 + XML_RNGP_MISSING_HREF = 1052 + XML_RNGP_NAME_MISSING = 1053 + XML_RNGP_NEED_COMBINE = 1054 + XML_RNGP_NOTALLOWED_NOT_EMPTY = 1055 + XML_RNGP_NSNAME_ATTR_ANCESTOR = 1056 + XML_RNGP_NSNAME_NO_NS = 1057 + XML_RNGP_PARAM_FORBIDDEN = 1058 + XML_RNGP_PARAM_NAME_MISSING = 1059 + XML_RNGP_PARENTREF_CREATE_FAILED = 1060 + XML_RNGP_PARENTREF_NAME_INVALID = 1061 + XML_RNGP_PARENTREF_NO_NAME = 1062 + XML_RNGP_PARENTREF_NO_PARENT = 1063 + XML_RNGP_PARENTREF_NOT_EMPTY = 1064 + XML_RNGP_PARSE_ERROR = 1065 + XML_RNGP_PAT_ANYNAME_EXCEPT_ANYNAME = 1066 + XML_RNGP_PAT_ATTR_ATTR = 1067 + XML_RNGP_PAT_ATTR_ELEM = 1068 + XML_RNGP_PAT_DATA_EXCEPT_ATTR = 1069 + XML_RNGP_PAT_DATA_EXCEPT_ELEM = 1070 + XML_RNGP_PAT_DATA_EXCEPT_EMPTY = 1071 + XML_RNGP_PAT_DATA_EXCEPT_GROUP = 1072 + XML_RNGP_PAT_DATA_EXCEPT_INTERLEAVE = 1073 + XML_RNGP_PAT_DATA_EXCEPT_LIST = 1074 + XML_RNGP_PAT_DATA_EXCEPT_ONEMORE = 1075 + XML_RNGP_PAT_DATA_EXCEPT_REF = 1076 + XML_RNGP_PAT_DATA_EXCEPT_TEXT = 1077 + XML_RNGP_PAT_LIST_ATTR = 1078 + XML_RNGP_PAT_LIST_ELEM = 1079 + XML_RNGP_PAT_LIST_INTERLEAVE = 1080 + XML_RNGP_PAT_LIST_LIST = 1081 + XML_RNGP_PAT_LIST_REF = 1082 + XML_RNGP_PAT_LIST_TEXT = 1083 + XML_RNGP_PAT_NSNAME_EXCEPT_ANYNAME = 1084 + XML_RNGP_PAT_NSNAME_EXCEPT_NSNAME = 1085 + XML_RNGP_PAT_ONEMORE_GROUP_ATTR = 1086 + XML_RNGP_PAT_ONEMORE_INTERLEAVE_ATTR = 1087 + XML_RNGP_PAT_START_ATTR = 1088 + XML_RNGP_PAT_START_DATA = 1089 + XML_RNGP_PAT_START_EMPTY = 1090 + XML_RNGP_PAT_START_GROUP = 1091 + XML_RNGP_PAT_START_INTERLEAVE = 1092 + XML_RNGP_PAT_START_LIST = 1093 + XML_RNGP_PAT_START_ONEMORE = 1094 + XML_RNGP_PAT_START_TEXT = 1095 + XML_RNGP_PAT_START_VALUE = 1096 + XML_RNGP_PREFIX_UNDEFINED = 1097 + XML_RNGP_REF_CREATE_FAILED = 1098 + XML_RNGP_REF_CYCLE = 1099 + XML_RNGP_REF_NAME_INVALID = 1100 + XML_RNGP_REF_NO_DEF = 1101 + XML_RNGP_REF_NO_NAME = 1102 + XML_RNGP_REF_NOT_EMPTY = 1103 + XML_RNGP_START_CHOICE_AND_INTERLEAVE = 1104 + XML_RNGP_START_CONTENT = 1105 + XML_RNGP_START_EMPTY = 1106 + XML_RNGP_START_MISSING = 1107 + XML_RNGP_TEXT_EXPECTED = 1108 + XML_RNGP_TEXT_HAS_CHILD = 1109 + XML_RNGP_TYPE_MISSING = 1110 + XML_RNGP_TYPE_NOT_FOUND = 1111 + XML_RNGP_TYPE_VALUE = 1112 + XML_RNGP_UNKNOWN_ATTRIBUTE = 1113 + XML_RNGP_UNKNOWN_COMBINE = 1114 + XML_RNGP_UNKNOWN_CONSTRUCT = 1115 + XML_RNGP_UNKNOWN_TYPE_LIB = 1116 + XML_RNGP_URI_FRAGMENT = 1117 + XML_RNGP_URI_NOT_ABSOLUTE = 1118 + XML_RNGP_VALUE_EMPTY = 1119 + XML_RNGP_VALUE_NO_CONTENT = 1120 + XML_RNGP_XMLNS_NAME = 1121 + XML_RNGP_XML_NS = 1122 + XML_XPATH_EXPRESSION_OK = 1200 + XML_XPATH_NUMBER_ERROR = 1201 + XML_XPATH_UNFINISHED_LITERAL_ERROR = 1202 + XML_XPATH_START_LITERAL_ERROR = 1203 + XML_XPATH_VARIABLE_REF_ERROR = 1204 + XML_XPATH_UNDEF_VARIABLE_ERROR = 1205 + XML_XPATH_INVALID_PREDICATE_ERROR = 1206 + XML_XPATH_EXPR_ERROR = 1207 + XML_XPATH_UNCLOSED_ERROR = 1208 + XML_XPATH_UNKNOWN_FUNC_ERROR = 1209 + XML_XPATH_INVALID_OPERAND = 1210 + XML_XPATH_INVALID_TYPE = 1211 + XML_XPATH_INVALID_ARITY = 1212 + XML_XPATH_INVALID_CTXT_SIZE = 1213 + XML_XPATH_INVALID_CTXT_POSITION = 1214 + XML_XPATH_MEMORY_ERROR = 1215 + XML_XPTR_SYNTAX_ERROR = 1216 + XML_XPTR_RESOURCE_ERROR = 1217 + XML_XPTR_SUB_RESOURCE_ERROR = 1218 + XML_XPATH_UNDEF_PREFIX_ERROR = 1219 + XML_XPATH_ENCODING_ERROR = 1220 + XML_XPATH_INVALID_CHAR_ERROR = 1221 + XML_TREE_INVALID_HEX = 1300 + XML_TREE_INVALID_DEC = 1301 + XML_TREE_UNTERMINATED_ENTITY = 1302 + XML_TREE_NOT_UTF8 = 1303 + XML_SAVE_NOT_UTF8 = 1400 + XML_SAVE_CHAR_INVALID = 1401 + XML_SAVE_NO_DOCTYPE = 1402 + XML_SAVE_UNKNOWN_ENCODING = 1403 + XML_REGEXP_COMPILE_ERROR = 1450 + XML_IO_UNKNOWN = 1500 + XML_IO_EACCES = 1501 + XML_IO_EAGAIN = 1502 + XML_IO_EBADF = 1503 + XML_IO_EBADMSG = 1504 + XML_IO_EBUSY = 1505 + XML_IO_ECANCELED = 1506 + XML_IO_ECHILD = 1507 + XML_IO_EDEADLK = 1508 + XML_IO_EDOM = 1509 + XML_IO_EEXIST = 1510 + XML_IO_EFAULT = 1511 + XML_IO_EFBIG = 1512 + XML_IO_EINPROGRESS = 1513 + XML_IO_EINTR = 1514 + XML_IO_EINVAL = 1515 + XML_IO_EIO = 1516 + XML_IO_EISDIR = 1517 + XML_IO_EMFILE = 1518 + XML_IO_EMLINK = 1519 + XML_IO_EMSGSIZE = 1520 + XML_IO_ENAMETOOLONG = 1521 + XML_IO_ENFILE = 1522 + XML_IO_ENODEV = 1523 + XML_IO_ENOENT = 1524 + XML_IO_ENOEXEC = 1525 + XML_IO_ENOLCK = 1526 + XML_IO_ENOMEM = 1527 + XML_IO_ENOSPC = 1528 + XML_IO_ENOSYS = 1529 + XML_IO_ENOTDIR = 1530 + XML_IO_ENOTEMPTY = 1531 + XML_IO_ENOTSUP = 1532 + XML_IO_ENOTTY = 1533 + XML_IO_ENXIO = 1534 + XML_IO_EPERM = 1535 + XML_IO_EPIPE = 1536 + XML_IO_ERANGE = 1537 + XML_IO_EROFS = 1538 + XML_IO_ESPIPE = 1539 + XML_IO_ESRCH = 1540 + XML_IO_ETIMEDOUT = 1541 + XML_IO_EXDEV = 1542 + XML_IO_NETWORK_ATTEMPT = 1543 + XML_IO_ENCODER = 1544 + XML_IO_FLUSH = 1545 + XML_IO_WRITE = 1546 + XML_IO_NO_INPUT = 1547 + XML_IO_BUFFER_FULL = 1548 + XML_IO_LOAD_ERROR = 1549 + XML_IO_ENOTSOCK = 1550 + XML_IO_EISCONN = 1551 + XML_IO_ECONNREFUSED = 1552 + XML_IO_ENETUNREACH = 1553 + XML_IO_EADDRINUSE = 1554 + XML_IO_EALREADY = 1555 + XML_IO_EAFNOSUPPORT = 1556 + XML_XINCLUDE_RECURSION = 1600 + XML_XINCLUDE_PARSE_VALUE = 1601 + XML_XINCLUDE_ENTITY_DEF_MISMATCH = 1602 + XML_XINCLUDE_NO_HREF = 1603 + XML_XINCLUDE_NO_FALLBACK = 1604 + XML_XINCLUDE_HREF_URI = 1605 + XML_XINCLUDE_TEXT_FRAGMENT = 1606 + XML_XINCLUDE_TEXT_DOCUMENT = 1607 + XML_XINCLUDE_INVALID_CHAR = 1608 + XML_XINCLUDE_BUILD_FAILED = 1609 + XML_XINCLUDE_UNKNOWN_ENCODING = 1610 + XML_XINCLUDE_MULTIPLE_ROOT = 1611 + XML_XINCLUDE_XPTR_FAILED = 1612 + XML_XINCLUDE_XPTR_RESULT = 1613 + XML_XINCLUDE_INCLUDE_IN_INCLUDE = 1614 + XML_XINCLUDE_FALLBACKS_IN_INCLUDE = 1615 + XML_XINCLUDE_FALLBACK_NOT_IN_INCLUDE = 1616 + XML_XINCLUDE_DEPRECATED_NS = 1617 + XML_XINCLUDE_FRAGMENT_ID = 1618 + XML_CATALOG_MISSING_ATTR = 1650 + XML_CATALOG_ENTRY_BROKEN = 1651 + XML_CATALOG_PREFER_VALUE = 1652 + XML_CATALOG_NOT_CATALOG = 1653 + XML_CATALOG_RECURSION = 1654 + XML_SCHEMAP_PREFIX_UNDEFINED = 1700 + XML_SCHEMAP_ATTRFORMDEFAULT_VALUE = 1701 + XML_SCHEMAP_ATTRGRP_NONAME_NOREF = 1702 + XML_SCHEMAP_ATTR_NONAME_NOREF = 1703 + XML_SCHEMAP_COMPLEXTYPE_NONAME_NOREF = 1704 + XML_SCHEMAP_ELEMFORMDEFAULT_VALUE = 1705 + XML_SCHEMAP_ELEM_NONAME_NOREF = 1706 + XML_SCHEMAP_EXTENSION_NO_BASE = 1707 + XML_SCHEMAP_FACET_NO_VALUE = 1708 + XML_SCHEMAP_FAILED_BUILD_IMPORT = 1709 + XML_SCHEMAP_GROUP_NONAME_NOREF = 1710 + XML_SCHEMAP_IMPORT_NAMESPACE_NOT_URI = 1711 + XML_SCHEMAP_IMPORT_REDEFINE_NSNAME = 1712 + XML_SCHEMAP_IMPORT_SCHEMA_NOT_URI = 1713 + XML_SCHEMAP_INVALID_BOOLEAN = 1714 + XML_SCHEMAP_INVALID_ENUM = 1715 + XML_SCHEMAP_INVALID_FACET = 1716 + XML_SCHEMAP_INVALID_FACET_VALUE = 1717 + XML_SCHEMAP_INVALID_MAXOCCURS = 1718 + XML_SCHEMAP_INVALID_MINOCCURS = 1719 + XML_SCHEMAP_INVALID_REF_AND_SUBTYPE = 1720 + XML_SCHEMAP_INVALID_WHITE_SPACE = 1721 + XML_SCHEMAP_NOATTR_NOREF = 1722 + XML_SCHEMAP_NOTATION_NO_NAME = 1723 + XML_SCHEMAP_NOTYPE_NOREF = 1724 + XML_SCHEMAP_REF_AND_SUBTYPE = 1725 + XML_SCHEMAP_RESTRICTION_NONAME_NOREF = 1726 + XML_SCHEMAP_SIMPLETYPE_NONAME = 1727 + XML_SCHEMAP_TYPE_AND_SUBTYPE = 1728 + XML_SCHEMAP_UNKNOWN_ALL_CHILD = 1729 + XML_SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD = 1730 + XML_SCHEMAP_UNKNOWN_ATTR_CHILD = 1731 + XML_SCHEMAP_UNKNOWN_ATTRGRP_CHILD = 1732 + XML_SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP = 1733 + XML_SCHEMAP_UNKNOWN_BASE_TYPE = 1734 + XML_SCHEMAP_UNKNOWN_CHOICE_CHILD = 1735 + XML_SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD = 1736 + XML_SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD = 1737 + XML_SCHEMAP_UNKNOWN_ELEM_CHILD = 1738 + XML_SCHEMAP_UNKNOWN_EXTENSION_CHILD = 1739 + XML_SCHEMAP_UNKNOWN_FACET_CHILD = 1740 + XML_SCHEMAP_UNKNOWN_FACET_TYPE = 1741 + XML_SCHEMAP_UNKNOWN_GROUP_CHILD = 1742 + XML_SCHEMAP_UNKNOWN_IMPORT_CHILD = 1743 + XML_SCHEMAP_UNKNOWN_LIST_CHILD = 1744 + XML_SCHEMAP_UNKNOWN_NOTATION_CHILD = 1745 + XML_SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD = 1746 + XML_SCHEMAP_UNKNOWN_REF = 1747 + XML_SCHEMAP_UNKNOWN_RESTRICTION_CHILD = 1748 + XML_SCHEMAP_UNKNOWN_SCHEMAS_CHILD = 1749 + XML_SCHEMAP_UNKNOWN_SEQUENCE_CHILD = 1750 + XML_SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD = 1751 + XML_SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD = 1752 + XML_SCHEMAP_UNKNOWN_TYPE = 1753 + XML_SCHEMAP_UNKNOWN_UNION_CHILD = 1754 + XML_SCHEMAP_ELEM_DEFAULT_FIXED = 1755 + XML_SCHEMAP_REGEXP_INVALID = 1756 + XML_SCHEMAP_FAILED_LOAD = 1757 + XML_SCHEMAP_NOTHING_TO_PARSE = 1758 + XML_SCHEMAP_NOROOT = 1759 + XML_SCHEMAP_REDEFINED_GROUP = 1760 + XML_SCHEMAP_REDEFINED_TYPE = 1761 + XML_SCHEMAP_REDEFINED_ELEMENT = 1762 + XML_SCHEMAP_REDEFINED_ATTRGROUP = 1763 + XML_SCHEMAP_REDEFINED_ATTR = 1764 + XML_SCHEMAP_REDEFINED_NOTATION = 1765 + XML_SCHEMAP_FAILED_PARSE = 1766 + XML_SCHEMAP_UNKNOWN_PREFIX = 1767 + XML_SCHEMAP_DEF_AND_PREFIX = 1768 + XML_SCHEMAP_UNKNOWN_INCLUDE_CHILD = 1769 + XML_SCHEMAP_INCLUDE_SCHEMA_NOT_URI = 1770 + XML_SCHEMAP_INCLUDE_SCHEMA_NO_URI = 1771 + XML_SCHEMAP_NOT_SCHEMA = 1772 + XML_SCHEMAP_UNKNOWN_MEMBER_TYPE = 1773 + XML_SCHEMAP_INVALID_ATTR_USE = 1774 + XML_SCHEMAP_RECURSIVE = 1775 + XML_SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE = 1776 + XML_SCHEMAP_INVALID_ATTR_COMBINATION = 1777 + XML_SCHEMAP_INVALID_ATTR_INLINE_COMBINATION = 1778 + XML_SCHEMAP_MISSING_SIMPLETYPE_CHILD = 1779 + XML_SCHEMAP_INVALID_ATTR_NAME = 1780 + XML_SCHEMAP_REF_AND_CONTENT = 1781 + XML_SCHEMAP_CT_PROPS_CORRECT_1 = 1782 + XML_SCHEMAP_CT_PROPS_CORRECT_2 = 1783 + XML_SCHEMAP_CT_PROPS_CORRECT_3 = 1784 + XML_SCHEMAP_CT_PROPS_CORRECT_4 = 1785 + XML_SCHEMAP_CT_PROPS_CORRECT_5 = 1786 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_1 = 1787 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1 = 1788 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2 = 1789 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_2 = 1790 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_3 = 1791 + XML_SCHEMAP_WILDCARD_INVALID_NS_MEMBER = 1792 + XML_SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE = 1793 + XML_SCHEMAP_UNION_NOT_EXPRESSIBLE = 1794 + XML_SCHEMAP_SRC_IMPORT_3_1 = 1795 + XML_SCHEMAP_SRC_IMPORT_3_2 = 1796 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_1 = 1797 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_2 = 1798 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_4_3 = 1799 + XML_SCHEMAP_COS_CT_EXTENDS_1_3 = 1800 + XML_SCHEMAV_NOROOT = 1801 + XML_SCHEMAV_UNDECLAREDELEM = 1802 + XML_SCHEMAV_NOTTOPLEVEL = 1803 + XML_SCHEMAV_MISSING = 1804 + XML_SCHEMAV_WRONGELEM = 1805 + XML_SCHEMAV_NOTYPE = 1806 + XML_SCHEMAV_NOROLLBACK = 1807 + XML_SCHEMAV_ISABSTRACT = 1808 + XML_SCHEMAV_NOTEMPTY = 1809 + XML_SCHEMAV_ELEMCONT = 1810 + XML_SCHEMAV_HAVEDEFAULT = 1811 + XML_SCHEMAV_NOTNILLABLE = 1812 + XML_SCHEMAV_EXTRACONTENT = 1813 + XML_SCHEMAV_INVALIDATTR = 1814 + XML_SCHEMAV_INVALIDELEM = 1815 + XML_SCHEMAV_NOTDETERMINIST = 1816 + XML_SCHEMAV_CONSTRUCT = 1817 + XML_SCHEMAV_INTERNAL = 1818 + XML_SCHEMAV_NOTSIMPLE = 1819 + XML_SCHEMAV_ATTRUNKNOWN = 1820 + XML_SCHEMAV_ATTRINVALID = 1821 + XML_SCHEMAV_VALUE = 1822 + XML_SCHEMAV_FACET = 1823 + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_1 = 1824 + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_2 = 1825 + XML_SCHEMAV_CVC_DATATYPE_VALID_1_2_3 = 1826 + XML_SCHEMAV_CVC_TYPE_3_1_1 = 1827 + XML_SCHEMAV_CVC_TYPE_3_1_2 = 1828 + XML_SCHEMAV_CVC_FACET_VALID = 1829 + XML_SCHEMAV_CVC_LENGTH_VALID = 1830 + XML_SCHEMAV_CVC_MINLENGTH_VALID = 1831 + XML_SCHEMAV_CVC_MAXLENGTH_VALID = 1832 + XML_SCHEMAV_CVC_MININCLUSIVE_VALID = 1833 + XML_SCHEMAV_CVC_MAXINCLUSIVE_VALID = 1834 + XML_SCHEMAV_CVC_MINEXCLUSIVE_VALID = 1835 + XML_SCHEMAV_CVC_MAXEXCLUSIVE_VALID = 1836 + XML_SCHEMAV_CVC_TOTALDIGITS_VALID = 1837 + XML_SCHEMAV_CVC_FRACTIONDIGITS_VALID = 1838 + XML_SCHEMAV_CVC_PATTERN_VALID = 1839 + XML_SCHEMAV_CVC_ENUMERATION_VALID = 1840 + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_1 = 1841 + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_2 = 1842 + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_3 = 1843 + XML_SCHEMAV_CVC_COMPLEX_TYPE_2_4 = 1844 + XML_SCHEMAV_CVC_ELT_1 = 1845 + XML_SCHEMAV_CVC_ELT_2 = 1846 + XML_SCHEMAV_CVC_ELT_3_1 = 1847 + XML_SCHEMAV_CVC_ELT_3_2_1 = 1848 + XML_SCHEMAV_CVC_ELT_3_2_2 = 1849 + XML_SCHEMAV_CVC_ELT_4_1 = 1850 + XML_SCHEMAV_CVC_ELT_4_2 = 1851 + XML_SCHEMAV_CVC_ELT_4_3 = 1852 + XML_SCHEMAV_CVC_ELT_5_1_1 = 1853 + XML_SCHEMAV_CVC_ELT_5_1_2 = 1854 + XML_SCHEMAV_CVC_ELT_5_2_1 = 1855 + XML_SCHEMAV_CVC_ELT_5_2_2_1 = 1856 + XML_SCHEMAV_CVC_ELT_5_2_2_2_1 = 1857 + XML_SCHEMAV_CVC_ELT_5_2_2_2_2 = 1858 + XML_SCHEMAV_CVC_ELT_6 = 1859 + XML_SCHEMAV_CVC_ELT_7 = 1860 + XML_SCHEMAV_CVC_ATTRIBUTE_1 = 1861 + XML_SCHEMAV_CVC_ATTRIBUTE_2 = 1862 + XML_SCHEMAV_CVC_ATTRIBUTE_3 = 1863 + XML_SCHEMAV_CVC_ATTRIBUTE_4 = 1864 + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_1 = 1865 + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_1 = 1866 + XML_SCHEMAV_CVC_COMPLEX_TYPE_3_2_2 = 1867 + XML_SCHEMAV_CVC_COMPLEX_TYPE_4 = 1868 + XML_SCHEMAV_CVC_COMPLEX_TYPE_5_1 = 1869 + XML_SCHEMAV_CVC_COMPLEX_TYPE_5_2 = 1870 + XML_SCHEMAV_ELEMENT_CONTENT = 1871 + XML_SCHEMAV_DOCUMENT_ELEMENT_MISSING = 1872 + XML_SCHEMAV_CVC_COMPLEX_TYPE_1 = 1873 + XML_SCHEMAV_CVC_AU = 1874 + XML_SCHEMAV_CVC_TYPE_1 = 1875 + XML_SCHEMAV_CVC_TYPE_2 = 1876 + XML_SCHEMAV_CVC_IDC = 1877 + XML_SCHEMAV_CVC_WILDCARD = 1878 + XML_SCHEMAV_MISC = 1879 + XML_XPTR_UNKNOWN_SCHEME = 1900 + XML_XPTR_CHILDSEQ_START = 1901 + XML_XPTR_EVAL_FAILED = 1902 + XML_XPTR_EXTRA_OBJECTS = 1903 + XML_C14N_CREATE_CTXT = 1950 + XML_C14N_REQUIRES_UTF8 = 1951 + XML_C14N_CREATE_STACK = 1952 + XML_C14N_INVALID_NODE = 1953 + XML_C14N_UNKNOW_NODE = 1954 + XML_C14N_RELATIVE_NAMESPACE = 1955 + XML_FTP_PASV_ANSWER = 2000 + XML_FTP_EPSV_ANSWER = 2001 + XML_FTP_ACCNT = 2002 + XML_FTP_URL_SYNTAX = 2003 + XML_HTTP_URL_SYNTAX = 2020 + XML_HTTP_USE_IP = 2021 + XML_HTTP_UNKNOWN_HOST = 2022 + XML_SCHEMAP_SRC_SIMPLE_TYPE_1 = 3000 + XML_SCHEMAP_SRC_SIMPLE_TYPE_2 = 3001 + XML_SCHEMAP_SRC_SIMPLE_TYPE_3 = 3002 + XML_SCHEMAP_SRC_SIMPLE_TYPE_4 = 3003 + XML_SCHEMAP_SRC_RESOLVE = 3004 + XML_SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE = 3005 + XML_SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE = 3006 + XML_SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES = 3007 + XML_SCHEMAP_ST_PROPS_CORRECT_1 = 3008 + XML_SCHEMAP_ST_PROPS_CORRECT_2 = 3009 + XML_SCHEMAP_ST_PROPS_CORRECT_3 = 3010 + XML_SCHEMAP_COS_ST_RESTRICTS_1_1 = 3011 + XML_SCHEMAP_COS_ST_RESTRICTS_1_2 = 3012 + XML_SCHEMAP_COS_ST_RESTRICTS_1_3_1 = 3013 + XML_SCHEMAP_COS_ST_RESTRICTS_1_3_2 = 3014 + XML_SCHEMAP_COS_ST_RESTRICTS_2_1 = 3015 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_1 = 3016 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_1_2 = 3017 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_1 = 3018 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_2 = 3019 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_3 = 3020 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_4 = 3021 + XML_SCHEMAP_COS_ST_RESTRICTS_2_3_2_5 = 3022 + XML_SCHEMAP_COS_ST_RESTRICTS_3_1 = 3023 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1 = 3024 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_1_2 = 3025 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_2 = 3026 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_1 = 3027 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_3 = 3028 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_4 = 3029 + XML_SCHEMAP_COS_ST_RESTRICTS_3_3_2_5 = 3030 + XML_SCHEMAP_COS_ST_DERIVED_OK_2_1 = 3031 + XML_SCHEMAP_COS_ST_DERIVED_OK_2_2 = 3032 + XML_SCHEMAP_S4S_ELEM_NOT_ALLOWED = 3033 + XML_SCHEMAP_S4S_ELEM_MISSING = 3034 + XML_SCHEMAP_S4S_ATTR_NOT_ALLOWED = 3035 + XML_SCHEMAP_S4S_ATTR_MISSING = 3036 + XML_SCHEMAP_S4S_ATTR_INVALID_VALUE = 3037 + XML_SCHEMAP_SRC_ELEMENT_1 = 3038 + XML_SCHEMAP_SRC_ELEMENT_2_1 = 3039 + XML_SCHEMAP_SRC_ELEMENT_2_2 = 3040 + XML_SCHEMAP_SRC_ELEMENT_3 = 3041 + XML_SCHEMAP_P_PROPS_CORRECT_1 = 3042 + XML_SCHEMAP_P_PROPS_CORRECT_2_1 = 3043 + XML_SCHEMAP_P_PROPS_CORRECT_2_2 = 3044 + XML_SCHEMAP_E_PROPS_CORRECT_2 = 3045 + XML_SCHEMAP_E_PROPS_CORRECT_3 = 3046 + XML_SCHEMAP_E_PROPS_CORRECT_4 = 3047 + XML_SCHEMAP_E_PROPS_CORRECT_5 = 3048 + XML_SCHEMAP_E_PROPS_CORRECT_6 = 3049 + XML_SCHEMAP_SRC_INCLUDE = 3050 + XML_SCHEMAP_SRC_ATTRIBUTE_1 = 3051 + XML_SCHEMAP_SRC_ATTRIBUTE_2 = 3052 + XML_SCHEMAP_SRC_ATTRIBUTE_3_1 = 3053 + XML_SCHEMAP_SRC_ATTRIBUTE_3_2 = 3054 + XML_SCHEMAP_SRC_ATTRIBUTE_4 = 3055 + XML_SCHEMAP_NO_XMLNS = 3056 + XML_SCHEMAP_NO_XSI = 3057 + XML_SCHEMAP_COS_VALID_DEFAULT_1 = 3058 + XML_SCHEMAP_COS_VALID_DEFAULT_2_1 = 3059 + XML_SCHEMAP_COS_VALID_DEFAULT_2_2_1 = 3060 + XML_SCHEMAP_COS_VALID_DEFAULT_2_2_2 = 3061 + XML_SCHEMAP_CVC_SIMPLE_TYPE = 3062 + XML_SCHEMAP_COS_CT_EXTENDS_1_1 = 3063 + XML_SCHEMAP_SRC_IMPORT_1_1 = 3064 + XML_SCHEMAP_SRC_IMPORT_1_2 = 3065 + XML_SCHEMAP_SRC_IMPORT_2 = 3066 + XML_SCHEMAP_SRC_IMPORT_2_1 = 3067 + XML_SCHEMAP_SRC_IMPORT_2_2 = 3068 + XML_SCHEMAP_INTERNAL = 3069 # 3069 non-W3C + XML_SCHEMAP_NOT_DETERMINISTIC = 3070 # 3070 non-W3C + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_1 = 3071 + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_2 = 3072 + XML_SCHEMAP_SRC_ATTRIBUTE_GROUP_3 = 3073 + XML_SCHEMAP_MG_PROPS_CORRECT_1 = 3074 + XML_SCHEMAP_MG_PROPS_CORRECT_2 = 3075 + XML_SCHEMAP_SRC_CT_1 = 3076 + XML_SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3 = 3077 + XML_SCHEMAP_AU_PROPS_CORRECT_2 = 3078 + XML_SCHEMAP_A_PROPS_CORRECT_2 = 3079 + XML_SCHEMAP_C_PROPS_CORRECT = 3080 + XML_SCHEMAP_SRC_REDEFINE = 3081 + XML_SCHEMAP_SRC_IMPORT = 3082 + XML_SCHEMAP_WARN_SKIP_SCHEMA = 3083 + XML_SCHEMAP_WARN_UNLOCATED_SCHEMA = 3084 + XML_SCHEMAP_WARN_ATTR_REDECL_PROH = 3085 + XML_SCHEMAP_WARN_ATTR_POINTLESS_PROH = 3086 # 3085 + XML_SCHEMAP_AG_PROPS_CORRECT = 3087 # 3086 + XML_SCHEMAP_COS_CT_EXTENDS_1_2 = 3088 # 3087 + XML_SCHEMAP_AU_PROPS_CORRECT = 3089 # 3088 + XML_SCHEMAP_A_PROPS_CORRECT_3 = 3090 # 3089 + XML_SCHEMAP_COS_ALL_LIMITED = 3091 # 3090 + XML_SCHEMATRONV_ASSERT = 4000 + XML_SCHEMATRONV_REPORT = 4001 + XML_MODULE_OPEN = 4900 + XML_MODULE_CLOSE = 4901 + XML_CHECK_FOUND_ELEMENT = 5000 + XML_CHECK_FOUND_ATTRIBUTE = 5001 + XML_CHECK_FOUND_TEXT = 5002 + XML_CHECK_FOUND_CDATA = 5003 + XML_CHECK_FOUND_ENTITYREF = 5004 + XML_CHECK_FOUND_ENTITY = 5005 + XML_CHECK_FOUND_PI = 5006 + XML_CHECK_FOUND_COMMENT = 5007 + XML_CHECK_FOUND_DOCTYPE = 5008 + XML_CHECK_FOUND_FRAGMENT = 5009 + XML_CHECK_FOUND_NOTATION = 5010 + XML_CHECK_UNKNOWN_NODE = 5011 + XML_CHECK_ENTITY_TYPE = 5012 + XML_CHECK_NO_PARENT = 5013 + XML_CHECK_NO_DOC = 5014 + XML_CHECK_NO_NAME = 5015 + XML_CHECK_NO_ELEM = 5016 + XML_CHECK_WRONG_DOC = 5017 + XML_CHECK_NO_PREV = 5018 + XML_CHECK_WRONG_PREV = 5019 + XML_CHECK_NO_NEXT = 5020 + XML_CHECK_WRONG_NEXT = 5021 + XML_CHECK_NOT_DTD = 5022 + XML_CHECK_NOT_ATTR = 5023 + XML_CHECK_NOT_ATTR_DECL = 5024 + XML_CHECK_NOT_ELEM_DECL = 5025 + XML_CHECK_NOT_ENTITY_DECL = 5026 + XML_CHECK_NOT_NS_DECL = 5027 + XML_CHECK_NO_HREF = 5028 + XML_CHECK_WRONG_PARENT = 5029 + XML_CHECK_NS_SCOPE = 5030 + XML_CHECK_NS_ANCESTOR = 5031 + XML_CHECK_NOT_UTF8 = 5032 + XML_CHECK_NO_DICT = 5033 + XML_CHECK_NOT_NCNAME = 5034 + XML_CHECK_OUTSIDE_DICT = 5035 + XML_CHECK_WRONG_NAME = 5036 + XML_CHECK_NAME_NOT_NULL = 5037 + XML_I18N_NO_NAME = 6000 + XML_I18N_NO_HANDLER = 6001 + XML_I18N_EXCESS_HANDLER = 6002 + XML_I18N_CONV_FAILED = 6003 + XML_I18N_NO_OUTPUT = 6004 + XML_BUF_OVERFLOW = 7000 + + ctypedef enum xmlRelaxNGValidErr: + XML_RELAXNG_OK = 0 + XML_RELAXNG_ERR_MEMORY = 1 + XML_RELAXNG_ERR_TYPE = 2 + XML_RELAXNG_ERR_TYPEVAL = 3 + XML_RELAXNG_ERR_DUPID = 4 + XML_RELAXNG_ERR_TYPECMP = 5 + XML_RELAXNG_ERR_NOSTATE = 6 + XML_RELAXNG_ERR_NODEFINE = 7 + XML_RELAXNG_ERR_LISTEXTRA = 8 + XML_RELAXNG_ERR_LISTEMPTY = 9 + XML_RELAXNG_ERR_INTERNODATA = 10 + XML_RELAXNG_ERR_INTERSEQ = 11 + XML_RELAXNG_ERR_INTEREXTRA = 12 + XML_RELAXNG_ERR_ELEMNAME = 13 + XML_RELAXNG_ERR_ATTRNAME = 14 + XML_RELAXNG_ERR_ELEMNONS = 15 + XML_RELAXNG_ERR_ATTRNONS = 16 + XML_RELAXNG_ERR_ELEMWRONGNS = 17 + XML_RELAXNG_ERR_ATTRWRONGNS = 18 + XML_RELAXNG_ERR_ELEMEXTRANS = 19 + XML_RELAXNG_ERR_ATTREXTRANS = 20 + XML_RELAXNG_ERR_ELEMNOTEMPTY = 21 + XML_RELAXNG_ERR_NOELEM = 22 + XML_RELAXNG_ERR_NOTELEM = 23 + XML_RELAXNG_ERR_ATTRVALID = 24 + XML_RELAXNG_ERR_CONTENTVALID = 25 + XML_RELAXNG_ERR_EXTRACONTENT = 26 + XML_RELAXNG_ERR_INVALIDATTR = 27 + XML_RELAXNG_ERR_DATAELEM = 28 + XML_RELAXNG_ERR_VALELEM = 29 + XML_RELAXNG_ERR_LISTELEM = 30 + XML_RELAXNG_ERR_DATATYPE = 31 + XML_RELAXNG_ERR_VALUE = 32 + XML_RELAXNG_ERR_LIST = 33 + XML_RELAXNG_ERR_NOGRAMMAR = 34 + XML_RELAXNG_ERR_EXTRADATA = 35 + XML_RELAXNG_ERR_LACKDATA = 36 + XML_RELAXNG_ERR_INTERNAL = 37 + XML_RELAXNG_ERR_ELEMWRONG = 38 + XML_RELAXNG_ERR_TEXTWRONG = 39 +# --- END: GENERATED CONSTANTS --- + +cdef extern from "libxml/xmlerror.h" nogil: + ctypedef struct xmlError: + int domain + int code + char* message + xmlErrorLevel level + char* file + char* str1 + char* str2 + char* str3 + int line + int int1 + int int2 + void* node + + ctypedef void (*xmlGenericErrorFunc)(void* ctxt, char* msg, ...) noexcept + ctypedef void (*xmlStructuredErrorFunc)(void* userData, + const xmlError* error) noexcept + + cdef void xmlSetGenericErrorFunc( + void* ctxt, xmlGenericErrorFunc func) + cdef void xmlSetStructuredErrorFunc( + void* ctxt, xmlStructuredErrorFunc func) + +cdef extern from "libxml/globals.h" nogil: + cdef xmlStructuredErrorFunc xmlStructuredError + cdef void* xmlStructuredErrorContext diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xmlparser.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xmlparser.pxd new file mode 100644 index 00000000..a43c74cf --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xmlparser.pxd @@ -0,0 +1,265 @@ +from libc.string cimport const_char + +from lxml.includes.tree cimport ( + xmlDoc, xmlNode, xmlEntity, xmlDict, xmlDtd, xmlChar, const_xmlChar) +from lxml.includes.tree cimport xmlInputReadCallback, xmlInputCloseCallback +from lxml.includes.xmlerror cimport xmlError, xmlStructuredErrorFunc, xmlErrorLevel + + +cdef extern from "libxml/parser.h" nogil: + ctypedef void (*startElementNsSAX2Func)(void* ctx, + const_xmlChar* localname, + const_xmlChar* prefix, + const_xmlChar* URI, + int nb_namespaces, + const_xmlChar** namespaces, + int nb_attributes, + int nb_defaulted, + const_xmlChar** attributes) noexcept + + ctypedef void (*endElementNsSAX2Func)(void* ctx, + const_xmlChar* localname, + const_xmlChar* prefix, + const_xmlChar* URI) noexcept + + ctypedef void (*startElementSAXFunc)(void* ctx, const_xmlChar* name, const_xmlChar** atts) noexcept + + ctypedef void (*endElementSAXFunc)(void* ctx, const_xmlChar* name) noexcept + + ctypedef void (*charactersSAXFunc)(void* ctx, const_xmlChar* ch, int len) noexcept + + ctypedef void (*cdataBlockSAXFunc)(void* ctx, const_xmlChar* value, int len) noexcept + + ctypedef void (*commentSAXFunc)(void* ctx, const_xmlChar* value) noexcept + + ctypedef void (*processingInstructionSAXFunc)(void* ctx, + const_xmlChar* target, + const_xmlChar* data) noexcept + + ctypedef void (*internalSubsetSAXFunc)(void* ctx, + const_xmlChar* name, + const_xmlChar* externalID, + const_xmlChar* systemID) noexcept + + ctypedef void (*endDocumentSAXFunc)(void* ctx) noexcept + + ctypedef void (*startDocumentSAXFunc)(void* ctx) noexcept + + ctypedef void (*referenceSAXFunc)(void * ctx, const_xmlChar* name) noexcept + + ctypedef xmlEntity* (*getEntitySAXFunc)(void* ctx, const_xmlChar* name) noexcept + + cdef int XML_SAX2_MAGIC + +cdef extern from "libxml/tree.h" nogil: + ctypedef struct xmlParserInput: + int line + int col + int length + const_xmlChar* base + const_xmlChar* cur + const_xmlChar* end + const_char *filename + + ctypedef struct xmlParserInputBuffer: + void* context + xmlInputReadCallback readcallback + xmlInputCloseCallback closecallback + + ctypedef struct xmlSAXHandlerV1: + # same as xmlSAXHandler, but without namespaces + pass + + ctypedef struct xmlSAXHandler: + internalSubsetSAXFunc internalSubset + startElementNsSAX2Func startElementNs + endElementNsSAX2Func endElementNs + startElementSAXFunc startElement + endElementSAXFunc endElement + charactersSAXFunc characters + cdataBlockSAXFunc cdataBlock + referenceSAXFunc reference + getEntitySAXFunc getEntity + commentSAXFunc comment + processingInstructionSAXFunc processingInstruction + startDocumentSAXFunc startDocument + endDocumentSAXFunc endDocument + int initialized + xmlStructuredErrorFunc serror + void* _private + + +cdef extern from "libxml/SAX2.h" nogil: + cdef void xmlSAX2StartDocument(void* ctxt) + + +cdef extern from "libxml/xmlIO.h" nogil: + cdef xmlParserInputBuffer* xmlAllocParserInputBuffer(int enc) + + +cdef extern from "libxml/parser.h" nogil: + + cdef xmlDict* xmlDictCreate() + cdef xmlDict* xmlDictCreateSub(xmlDict* subdict) + cdef void xmlDictFree(xmlDict* sub) + cdef int xmlDictReference(xmlDict* dict) + + cdef int XML_COMPLETE_ATTRS # SAX option for adding DTD default attributes + cdef int XML_SKIP_IDS # SAX option for not building an XML ID dict + + ctypedef enum xmlParserInputState: + XML_PARSER_EOF = -1 # nothing is to be parsed + XML_PARSER_START = 0 # nothing has been parsed + XML_PARSER_MISC = 1 # Misc* before int subset + XML_PARSER_PI = 2 # Within a processing instruction + XML_PARSER_DTD = 3 # within some DTD content + XML_PARSER_PROLOG = 4 # Misc* after internal subset + XML_PARSER_COMMENT = 5 # within a comment + XML_PARSER_START_TAG = 6 # within a start tag + XML_PARSER_CONTENT = 7 # within the content + XML_PARSER_CDATA_SECTION = 8 # within a CDATA section + XML_PARSER_END_TAG = 9 # within a closing tag + XML_PARSER_ENTITY_DECL = 10 # within an entity declaration + XML_PARSER_ENTITY_VALUE = 11 # within an entity value in a decl + XML_PARSER_ATTRIBUTE_VALUE = 12 # within an attribute value + XML_PARSER_SYSTEM_LITERAL = 13 # within a SYSTEM value + XML_PARSER_EPILOG = 14 # the Misc* after the last end tag + XML_PARSER_IGNORE = 15 # within an IGNORED section + XML_PARSER_PUBLIC_LITERAL = 16 # within a PUBLIC value + + + ctypedef struct xmlParserCtxt: + xmlDoc* myDoc + xmlDict* dict + int dictNames + void* _private + bint wellFormed + bint recovery + int options + bint disableSAX + int errNo + xmlParserInputState instate + bint replaceEntities + int loadsubset # != 0 if enabled, int value == why + bint validate + xmlError lastError + xmlNode* node + xmlSAXHandler* sax + void* userData + int* spaceTab + int spaceMax + int nsNr + bint html + bint progressive + int inSubset + int charset + xmlParserInput* input + int inputNr + xmlParserInput* inputTab[] + + ctypedef enum xmlParserOption: + XML_PARSE_RECOVER = 1 # recover on errors + XML_PARSE_NOENT = 2 # substitute entities + XML_PARSE_DTDLOAD = 4 # load the external subset + XML_PARSE_DTDATTR = 8 # default DTD attributes + XML_PARSE_DTDVALID = 16 # validate with the DTD + XML_PARSE_NOERROR = 32 # suppress error reports + XML_PARSE_NOWARNING = 64 # suppress warning reports + XML_PARSE_PEDANTIC = 128 # pedantic error reporting + XML_PARSE_NOBLANKS = 256 # remove blank nodes + XML_PARSE_SAX1 = 512 # use the SAX1 interface internally + XML_PARSE_XINCLUDE = 1024 # Implement XInclude substitution + XML_PARSE_NONET = 2048 # Forbid network access + XML_PARSE_NODICT = 4096 # Do not reuse the context dictionary + XML_PARSE_NSCLEAN = 8192 # remove redundant namespaces declarations + XML_PARSE_NOCDATA = 16384 # merge CDATA as text nodes + XML_PARSE_NOXINCNODE = 32768 # do not generate XINCLUDE START/END nodes + # libxml2 2.6.21+ only: + XML_PARSE_COMPACT = 65536 # compact small text nodes + # libxml2 2.7.0+ only: + XML_PARSE_OLD10 = 131072 # parse using XML-1.0 before update 5 + XML_PARSE_NOBASEFIX = 262144 # do not fixup XINCLUDE xml:base uris + XML_PARSE_HUGE = 524288 # relax any hardcoded limit from the parser + # libxml2 2.7.3+ only: + XML_PARSE_OLDSAX = 1048576 # parse using SAX2 interface before 2.7.0 + # libxml2 2.8.0+ only: + XML_PARSE_IGNORE_ENC = 2097152 # ignore internal document encoding hint + # libxml2 2.9.0+ only: + XML_PARSE_BIG_LINES = 4194304 # Store big lines numbers in text PSVI field + + cdef void xmlInitParser() + cdef void xmlCleanupParser() + + cdef int xmlLineNumbersDefault(int onoff) + cdef xmlParserCtxt* xmlNewParserCtxt() + cdef xmlParserInput* xmlNewIOInputStream(xmlParserCtxt* ctxt, + xmlParserInputBuffer* input, + int enc) + cdef int xmlCtxtUseOptions(xmlParserCtxt* ctxt, int options) + cdef void xmlFreeParserCtxt(xmlParserCtxt* ctxt) + cdef void xmlCtxtReset(xmlParserCtxt* ctxt) + cdef void xmlClearParserCtxt(xmlParserCtxt* ctxt) + cdef int xmlParseChunk(xmlParserCtxt* ctxt, + char* chunk, int size, int terminate) + cdef xmlDoc* xmlCtxtReadDoc(xmlParserCtxt* ctxt, + char* cur, char* URL, char* encoding, + int options) + cdef xmlDoc* xmlCtxtReadFile(xmlParserCtxt* ctxt, + char* filename, char* encoding, + int options) + cdef xmlDoc* xmlCtxtReadIO(xmlParserCtxt* ctxt, + xmlInputReadCallback ioread, + xmlInputCloseCallback ioclose, + void* ioctx, + char* URL, char* encoding, + int options) + cdef xmlDoc* xmlCtxtReadMemory(xmlParserCtxt* ctxt, + char* buffer, int size, + char* filename, const_char* encoding, + int options) + + cdef void xmlErrParser(xmlParserCtxt* ctxt, xmlNode* node, + int domain, int code, xmlErrorLevel level, + const xmlChar *str1, const xmlChar *str2, const xmlChar *str3, + int int1, const char *msg, ...) + + +# iterparse: + + cdef xmlParserCtxt* xmlCreatePushParserCtxt(xmlSAXHandler* sax, + void* user_data, + char* chunk, + int size, + char* filename) + + cdef int xmlCtxtResetPush(xmlParserCtxt* ctxt, + char* chunk, + int size, + char* filename, + char* encoding) + +# entity loaders: + + ctypedef xmlParserInput* (*xmlExternalEntityLoader)( + const_char * URL, const_char * ID, xmlParserCtxt* context) noexcept + cdef xmlExternalEntityLoader xmlGetExternalEntityLoader() + cdef void xmlSetExternalEntityLoader(xmlExternalEntityLoader f) + + cdef xmlEntity* xmlSAX2GetEntity(void* ctxt, const_xmlChar* name) noexcept + +# DTDs: + + cdef xmlDtd* xmlParseDTD(const_xmlChar* ExternalID, const_xmlChar* SystemID) + cdef xmlDtd* xmlIOParseDTD(xmlSAXHandler* sax, + xmlParserInputBuffer* input, + int enc) + + +cdef extern from "libxml/parserInternals.h" nogil: + cdef xmlParserInput* xmlNewInputStream(xmlParserCtxt* ctxt) + cdef xmlParserInput* xmlNewStringInputStream(xmlParserCtxt* ctxt, + char* buffer) + cdef xmlParserInput* xmlNewInputFromFile(xmlParserCtxt* ctxt, + char* filename) + cdef void xmlFreeInputStream(xmlParserInput* input) + cdef int xmlSwitchEncoding(xmlParserCtxt* ctxt, int enc) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xmlschema.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xmlschema.pxd new file mode 100644 index 00000000..06741111 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xmlschema.pxd @@ -0,0 +1,35 @@ +from lxml.includes.tree cimport xmlDoc +from lxml.includes.xmlparser cimport xmlSAXHandler +from lxml.includes.xmlerror cimport xmlStructuredErrorFunc + +cdef extern from "libxml/xmlschemas.h" nogil: + ctypedef struct xmlSchema + ctypedef struct xmlSchemaParserCtxt + + ctypedef struct xmlSchemaSAXPlugStruct + ctypedef struct xmlSchemaValidCtxt + + ctypedef enum xmlSchemaValidOption: + XML_SCHEMA_VAL_VC_I_CREATE = 1 + + cdef xmlSchemaValidCtxt* xmlSchemaNewValidCtxt(xmlSchema* schema) nogil + cdef void xmlSchemaSetParserStructuredErrors(xmlSchemaParserCtxt* ctxt, + xmlStructuredErrorFunc serror, void *ctx) + cdef void xmlSchemaSetValidStructuredErrors(xmlSchemaValidCtxt* ctxt, + xmlStructuredErrorFunc serror, void *ctx) + + cdef int xmlSchemaValidateDoc(xmlSchemaValidCtxt* ctxt, xmlDoc* doc) nogil + cdef xmlSchema* xmlSchemaParse(xmlSchemaParserCtxt* ctxt) nogil + cdef xmlSchemaParserCtxt* xmlSchemaNewParserCtxt(char* URL) nogil + cdef xmlSchemaParserCtxt* xmlSchemaNewDocParserCtxt(xmlDoc* doc) nogil + cdef void xmlSchemaFree(xmlSchema* schema) nogil + cdef void xmlSchemaFreeParserCtxt(xmlSchemaParserCtxt* ctxt) nogil + cdef void xmlSchemaFreeValidCtxt(xmlSchemaValidCtxt* ctxt) nogil + cdef int xmlSchemaSetValidOptions(xmlSchemaValidCtxt* ctxt, + int options) nogil + + cdef xmlSchemaSAXPlugStruct* xmlSchemaSAXPlug(xmlSchemaValidCtxt* ctxt, + xmlSAXHandler** sax, + void** data) nogil + cdef int xmlSchemaSAXUnplug(xmlSchemaSAXPlugStruct* sax_plug) + cdef int xmlSchemaIsValid(xmlSchemaValidCtxt* ctxt) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd new file mode 100644 index 00000000..22069eb7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd @@ -0,0 +1,136 @@ +from lxml.includes cimport tree +from lxml.includes cimport xmlerror + +from libc.string cimport const_char +from lxml.includes.tree cimport xmlChar, const_xmlChar + + +cdef extern from "libxml/xpath.h" nogil: + ctypedef enum xmlXPathObjectType: + XPATH_UNDEFINED = 0 + XPATH_NODESET = 1 + XPATH_BOOLEAN = 2 + XPATH_NUMBER = 3 + XPATH_STRING = 4 + XPATH_POINT = 5 + XPATH_RANGE = 6 + XPATH_LOCATIONSET = 7 + XPATH_USERS = 8 + XPATH_XSLT_TREE = 9 + + ctypedef enum xmlXPathError: + XPATH_EXPRESSION_OK = 0 + XPATH_NUMBER_ERROR = 1 + XPATH_UNFINISHED_LITERAL_ERROR = 2 + XPATH_START_LITERAL_ERROR = 3 + XPATH_VARIABLE_REF_ERROR = 4 + XPATH_UNDEF_VARIABLE_ERROR = 5 + XPATH_INVALID_PREDICATE_ERROR = 6 + XPATH_EXPR_ERROR = 7 + XPATH_UNCLOSED_ERROR = 8 + XPATH_UNKNOWN_FUNC_ERROR = 9 + XPATH_INVALID_OPERAND = 10 + XPATH_INVALID_TYPE = 11 + XPATH_INVALID_ARITY = 12 + XPATH_INVALID_CTXT_SIZE = 13 + XPATH_INVALID_CTXT_POSITION = 14 + XPATH_MEMORY_ERROR = 15 + XPTR_SYNTAX_ERROR = 16 + XPTR_RESOURCE_ERROR = 17 + XPTR_SUB_RESOURCE_ERROR = 18 + XPATH_UNDEF_PREFIX_ERROR = 19 + XPATH_ENCODING_ERROR = 20 + XPATH_INVALID_CHAR_ERROR = 21 + XPATH_INVALID_CTXT = 22 + + ctypedef struct xmlNodeSet: + int nodeNr + int nodeMax + tree.xmlNode** nodeTab + + ctypedef struct xmlXPathObject: + xmlXPathObjectType type + xmlNodeSet* nodesetval + bint boolval + double floatval + xmlChar* stringval + + ctypedef struct xmlXPathContext: + tree.xmlDoc* doc + tree.xmlNode* node + tree.xmlDict* dict + tree.xmlHashTable* nsHash + const_xmlChar* function + const_xmlChar* functionURI + xmlerror.xmlStructuredErrorFunc error + xmlerror.xmlError lastError + void* userData + + ctypedef struct xmlXPathParserContext: + xmlXPathContext* context + xmlXPathObject* value + tree.xmlNode* ancestor + int error + + ctypedef struct xmlXPathCompExpr + + ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs) + ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt, + const_xmlChar* name, + const_xmlChar* ns_uri) + + cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc) + cdef xmlXPathObject* xmlXPathEvalExpression(const_xmlChar* str, + xmlXPathContext* ctxt) + cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp, + xmlXPathContext* ctxt) + cdef xmlXPathCompExpr* xmlXPathCompile(const_xmlChar* str) + cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt, + const_xmlChar* str) + cdef void xmlXPathFreeContext(xmlXPathContext* ctxt) + cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp) + cdef void xmlXPathFreeObject(xmlXPathObject* obj) + cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt, + const_xmlChar* prefix, const_xmlChar* ns_uri) + + cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val) + cdef void xmlXPathFreeNodeSet(xmlNodeSet* val) + + +cdef extern from "libxml/xpathInternals.h" nogil: + cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt, + const_xmlChar* name, + xmlXPathFunction f) + cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt, + const_xmlChar* name, + const_xmlChar* ns_uri, + xmlXPathFunction f) + cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt, + xmlXPathFuncLookupFunc f, + void *funcCtxt) + cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt, + const_xmlChar* name, + xmlXPathObject* value) + cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt, + const_xmlChar* name, + const_xmlChar* ns_uri, + xmlXPathObject* value) + cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt) + cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt) + cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt) + cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value) + + cdef xmlXPathObject* xmlXPathNewCString(const_char *val) + cdef xmlXPathObject* xmlXPathWrapCString(const_char * val) + cdef xmlXPathObject* xmlXPathNewString(const_xmlChar *val) + cdef xmlXPathObject* xmlXPathWrapString(const_xmlChar * val) + cdef xmlXPathObject* xmlXPathNewFloat(double val) + cdef xmlXPathObject* xmlXPathNewBoolean(int val) + cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val) + cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val) + cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur, + tree.xmlNode* val) + cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur, + tree.xmlNode* val) + cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val) + cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error) diff --git a/.venv/lib/python3.12/site-packages/lxml/includes/xslt.pxd b/.venv/lib/python3.12/site-packages/lxml/includes/xslt.pxd new file mode 100644 index 00000000..abafe432 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/includes/xslt.pxd @@ -0,0 +1,190 @@ +from lxml.includes.tree cimport xmlDoc, xmlNode, xmlDict, xmlChar, const_xmlChar, xmlOutputBuffer +from lxml.includes.xmlerror cimport xmlGenericErrorFunc +from lxml.includes.xpath cimport xmlXPathContext, xmlXPathFunction + +from libc.string cimport const_char + +cdef extern from "libxslt/xslt.h": + cdef int xsltLibxsltVersion + cdef int xsltMaxDepth + +cdef extern from "libxslt/xsltconfig.h": + cdef int LIBXSLT_VERSION + +cdef extern from "libxslt/xsltInternals.h" nogil: + ctypedef enum xsltTransformState: + XSLT_STATE_OK # 0 + XSLT_STATE_ERROR # 1 + XSLT_STATE_STOPPED # 2 + + ctypedef struct xsltDocument: + xmlDoc* doc + + ctypedef struct xsltStylesheet: + xmlChar* encoding + xmlDoc* doc + int errors + + ctypedef struct xsltTransformContext: + xsltStylesheet* style + xmlXPathContext* xpathCtxt + xsltDocument* document + void* _private + xmlDict* dict + int profile + xmlNode* node + xmlDoc* output + xmlNode* insert + xmlNode* inst + xsltTransformState state + + ctypedef struct xsltStackElem + + ctypedef struct xsltTemplate + + cdef xsltStylesheet* xsltParseStylesheetDoc(xmlDoc* doc) + cdef void xsltFreeStylesheet(xsltStylesheet* sheet) + +cdef extern from "libxslt/imports.h" nogil: + # actually defined in "etree_defs.h" + cdef void LXML_GET_XSLT_ENCODING(const_xmlChar* result_var, xsltStylesheet* style) + +cdef extern from "libxslt/extensions.h" nogil: + ctypedef void (*xsltTransformFunction)(xsltTransformContext* ctxt, + xmlNode* context_node, + xmlNode* inst, + void* precomp_unused) noexcept + + cdef int xsltRegisterExtFunction(xsltTransformContext* ctxt, + const_xmlChar* name, + const_xmlChar* URI, + xmlXPathFunction function) + cdef int xsltRegisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI, + xmlXPathFunction function) + cdef int xsltUnregisterExtModuleFunction(const_xmlChar* name, const_xmlChar* URI) + cdef xmlXPathFunction xsltExtModuleFunctionLookup( + const_xmlChar* name, const_xmlChar* URI) + cdef int xsltRegisterExtPrefix(xsltStylesheet* style, + const_xmlChar* prefix, const_xmlChar* URI) + cdef int xsltRegisterExtElement(xsltTransformContext* ctxt, + const_xmlChar* name, const_xmlChar* URI, + xsltTransformFunction function) + +cdef extern from "libxslt/documents.h" nogil: + ctypedef enum xsltLoadType: + XSLT_LOAD_START + XSLT_LOAD_STYLESHEET + XSLT_LOAD_DOCUMENT + + ctypedef xmlDoc* (*xsltDocLoaderFunc)(const_xmlChar* URI, xmlDict* dict, + int options, + void* ctxt, + xsltLoadType type) noexcept + cdef xsltDocLoaderFunc xsltDocDefaultLoader + cdef void xsltSetLoaderFunc(xsltDocLoaderFunc f) + +cdef extern from "libxslt/transform.h" nogil: + cdef xmlDoc* xsltApplyStylesheet(xsltStylesheet* style, xmlDoc* doc, + const_char** params) + cdef xmlDoc* xsltApplyStylesheetUser(xsltStylesheet* style, xmlDoc* doc, + const_char** params, const_char* output, + void* profile, + xsltTransformContext* context) + cdef void xsltProcessOneNode(xsltTransformContext* ctxt, + xmlNode* contextNode, + xsltStackElem* params) + cdef xsltTransformContext* xsltNewTransformContext(xsltStylesheet* style, + xmlDoc* doc) + cdef void xsltFreeTransformContext(xsltTransformContext* context) + cdef void xsltApplyOneTemplate(xsltTransformContext* ctxt, + xmlNode* contextNode, xmlNode* list, + xsltTemplate* templ, + xsltStackElem* params) + + +cdef extern from "libxslt/xsltutils.h" nogil: + cdef int xsltSaveResultToString(xmlChar** doc_txt_ptr, + int* doc_txt_len, + xmlDoc* result, + xsltStylesheet* style) + cdef int xsltSaveResultToFilename(const_char *URL, + xmlDoc* result, + xsltStylesheet* style, + int compression) + cdef int xsltSaveResultTo(xmlOutputBuffer* buf, + xmlDoc* result, + xsltStylesheet* style) + cdef xmlGenericErrorFunc xsltGenericError + cdef void *xsltGenericErrorContext + cdef void xsltSetGenericErrorFunc( + void* ctxt, void (*handler)(void* ctxt, char* msg, ...) nogil) + cdef void xsltSetTransformErrorFunc( + xsltTransformContext*, void* ctxt, + void (*handler)(void* ctxt, char* msg, ...) nogil) + cdef void xsltTransformError(xsltTransformContext* ctxt, + xsltStylesheet* style, + xmlNode* node, char* msg, ...) + cdef void xsltSetCtxtParseOptions( + xsltTransformContext* ctxt, int options) + + +cdef extern from "libxslt/security.h" nogil: + ctypedef struct xsltSecurityPrefs + ctypedef enum xsltSecurityOption: + XSLT_SECPREF_READ_FILE = 1 + XSLT_SECPREF_WRITE_FILE = 2 + XSLT_SECPREF_CREATE_DIRECTORY = 3 + XSLT_SECPREF_READ_NETWORK = 4 + XSLT_SECPREF_WRITE_NETWORK = 5 + + ctypedef int (*xsltSecurityCheck)(xsltSecurityPrefs* sec, + xsltTransformContext* ctxt, + char* value) noexcept + + cdef xsltSecurityPrefs* xsltNewSecurityPrefs() + cdef void xsltFreeSecurityPrefs(xsltSecurityPrefs* sec) + cdef int xsltSecurityForbid(xsltSecurityPrefs* sec, + xsltTransformContext* ctxt, + char* value) + cdef int xsltSecurityAllow(xsltSecurityPrefs* sec, + xsltTransformContext* ctxt, + char* value) + cdef int xsltSetSecurityPrefs(xsltSecurityPrefs* sec, + xsltSecurityOption option, + xsltSecurityCheck func) + cdef xsltSecurityCheck xsltGetSecurityPrefs( + xsltSecurityPrefs* sec, + xsltSecurityOption option) + cdef int xsltSetCtxtSecurityPrefs(xsltSecurityPrefs* sec, + xsltTransformContext* ctxt) + cdef xmlDoc* xsltGetProfileInformation(xsltTransformContext* ctxt) + +cdef extern from "libxslt/variables.h" nogil: + cdef int xsltQuoteUserParams(xsltTransformContext* ctxt, + const_char** params) + cdef int xsltQuoteOneUserParam(xsltTransformContext* ctxt, + const_xmlChar* name, + const_xmlChar* value) + +cdef extern from "libxslt/extra.h" nogil: + const_xmlChar* XSLT_LIBXSLT_NAMESPACE + const_xmlChar* XSLT_XALAN_NAMESPACE + const_xmlChar* XSLT_SAXON_NAMESPACE + const_xmlChar* XSLT_XT_NAMESPACE + + cdef xmlXPathFunction xsltFunctionNodeSet + cdef void xsltRegisterAllExtras() + +cdef extern from "libexslt/exslt.h" nogil: + cdef void exsltRegisterAll() + + # libexslt 1.1.25+ + const_xmlChar* EXSLT_DATE_NAMESPACE + const_xmlChar* EXSLT_SETS_NAMESPACE + const_xmlChar* EXSLT_MATH_NAMESPACE + const_xmlChar* EXSLT_STRINGS_NAMESPACE + + cdef int exsltDateXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix) + cdef int exsltSetsXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix) + cdef int exsltMathXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix) + cdef int exsltStrXpathCtxtRegister(xmlXPathContext* ctxt, const_xmlChar* prefix) diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py b/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py new file mode 100644 index 00000000..ac89fb62 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py @@ -0,0 +1,348 @@ +"""The ``lxml.isoschematron`` package implements ISO Schematron support on top +of the pure-xslt 'skeleton' implementation. +""" + +import sys +import os.path +from lxml import etree as _etree # due to validator __init__ signature + + +# some compat stuff, borrowed from lxml.html +try: + unicode +except NameError: + # Python 3 + unicode = str +try: + basestring +except NameError: + # Python 3 + basestring = str + + +__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include', + 'iso_abstract_expand', 'iso_svrl_for_xslt1', + 'svrl_validation_errors', 'schematron_schema_valid', + 'stylesheet_params', 'Schematron'] + + +# some namespaces +#FIXME: Maybe lxml should provide a dedicated place for common namespace +#FIXME: definitions? +XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema" +RELAXNG_NS = "http://relaxng.org/ns/structure/1.0" +SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron" +SVRL_NS = "http://purl.oclc.org/dsdl/svrl" + + +# some helpers +_schematron_root = '{%s}schema' % SCHEMATRON_NS +_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS +_resources_dir = os.path.join(os.path.dirname(__file__), 'resources') + + +# the iso-schematron skeleton implementation steps aka xsl transformations +extract_xsd = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl'))) +extract_rng = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl'))) +iso_dsdl_include = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', + 'iso_dsdl_include.xsl'))) +iso_abstract_expand = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1', + 'iso_abstract_expand.xsl'))) +iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse( + os.path.join(_resources_dir, + 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl'))) + + +# svrl result accessors +svrl_validation_errors = _etree.XPath( + '//svrl:failed-assert', namespaces={'svrl': SVRL_NS}) + +# RelaxNG validator for schematron schemas +schematron_schema_valid_supported = False +try: + schematron_schema_valid = _etree.RelaxNG( + file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng')) + schematron_schema_valid_supported = True +except _etree.RelaxNGParseError: + # Some distributions delete the file due to licensing issues. + def schematron_schema_valid(arg): + raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng") + + +def stylesheet_params(**kwargs): + """Convert keyword args to a dictionary of stylesheet parameters. + XSL stylesheet parameters must be XPath expressions, i.e.: + + * string expressions, like "'5'" + * simple (number) expressions, like "5" + * valid XPath expressions, like "/a/b/text()" + + This function converts native Python keyword arguments to stylesheet + parameters following these rules: + If an arg is a string wrap it with XSLT.strparam(). + If an arg is an XPath object use its path string. + If arg is None raise TypeError. + Else convert arg to string. + """ + result = {} + for key, val in kwargs.items(): + if isinstance(val, basestring): + val = _etree.XSLT.strparam(val) + elif val is None: + raise TypeError('None not allowed as a stylesheet parameter') + elif not isinstance(val, _etree.XPath): + val = unicode(val) + result[key] = val + return result + + +# helper function for use in Schematron __init__ +def _stylesheet_param_dict(paramsDict, kwargsDict): + """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as + stylesheet arguments. + kwargsDict entries with a value of None are ignored. + """ + # beware of changing mutable default arg + paramsDict = dict(paramsDict) + for k, v in kwargsDict.items(): + if v is not None: # None values do not override + paramsDict[k] = v + paramsDict = stylesheet_params(**paramsDict) + return paramsDict + + +class Schematron(_etree._Validator): + """An ISO Schematron validator. + + Pass a root Element or an ElementTree to turn it into a validator. + Alternatively, pass a filename as keyword argument 'file' to parse from + the file system. + + Schematron is a less well known, but very powerful schema language. + The main idea is to use the capabilities of XPath to put restrictions on + the structure and the content of XML documents. + + The standard behaviour is to fail on ``failed-assert`` findings only + (``ASSERTS_ONLY``). To change this, you can either pass a report filter + function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS`` + or a custom ``XPath`` object), or subclass isoschematron.Schematron for + complete control of the validation process. + + Built on the Schematron language 'reference' skeleton pure-xslt + implementation, the validator is created as an XSLT 1.0 stylesheet using + these steps: + + 0) (Extract from XML Schema or RelaxNG schema) + 1) Process inclusions + 2) Process abstract patterns + 3) Compile the schematron schema to XSLT + + The ``include`` and ``expand`` keyword arguments can be used to switch off + steps 1) and 2). + To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the + keyword arguments ``include_params``, ``expand_params`` or + ``compile_params``. + For convenience, the compile-step parameter ``phase`` is also exposed as a + keyword argument ``phase``. This takes precedence if the parameter is also + given in the parameter dictionary. + + If ``store_schematron`` is set to True, the (included-and-expanded) + schematron document tree is stored and available through the ``schematron`` + property. + If ``store_xslt`` is set to True, the validation XSLT document tree will be + stored and can be retrieved through the ``validator_xslt`` property. + With ``store_report`` set to True (default: False), the resulting validation + report document gets stored and can be accessed as the ``validation_report`` + property. + + If ``validate_schema`` is set to False, the validation of the schema file + itself is disabled. Validation happens by default after building the full + schema, unless the schema validation file cannot be found at import time, + in which case the validation gets disabled. Some lxml distributions exclude + this file due to licensing issues. ISO-Schematron validation can then still + be used normally, but the schemas themselves cannot be validated. + + Here is a usage example:: + + >>> from lxml import etree + >>> from lxml.isoschematron import Schematron + + >>> schematron = Schematron(etree.XML(''' + ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" > + ... <pattern id="id_only_attribute"> + ... <title>id is the only permitted attribute name</title> + ... <rule context="*"> + ... <report test="@*[not(name()='id')]">Attribute + ... <name path="@*[not(name()='id')]"/> is forbidden<name/> + ... </report> + ... </rule> + ... </pattern> + ... </schema>'''), + ... error_finder=Schematron.ASSERTS_AND_REPORTS) + + >>> xml = etree.XML(''' + ... <AAA name="aaa"> + ... <BBB id="bbb"/> + ... <CCC color="ccc"/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + False + + >>> xml = etree.XML(''' + ... <AAA id="aaa"> + ... <BBB id="bbb"/> + ... <CCC/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + True + """ + + # libxml2 error categorization for validation errors + _domain = _etree.ErrorDomains.SCHEMATRONV + _level = _etree.ErrorLevels.ERROR + _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT + + # convenience definitions for common behaviours + ASSERTS_ONLY = svrl_validation_errors # Default + ASSERTS_AND_REPORTS = _etree.XPath( + '//svrl:failed-assert | //svrl:successful-report', + namespaces={'svrl': SVRL_NS}) + + def _extract(self, element): + """Extract embedded schematron schema from non-schematron host schema. + This method will only be called by __init__ if the given schema document + is not a schematron schema by itself. + Must return a schematron schema document tree or None. + """ + schematron = None + if element.tag == _xml_schema_root: + schematron = self._extract_xsd(element) + elif element.nsmap.get(element.prefix) == RELAXNG_NS: + # RelaxNG does not have a single unique root element + schematron = self._extract_rng(element) + return schematron + + # customization points + # etree.XSLT objects that provide the extract, include, expand, compile + # steps + _extract_xsd = extract_xsd + _extract_rng = extract_rng + _include = iso_dsdl_include + _expand = iso_abstract_expand + _compile = iso_svrl_for_xslt1 + + # etree.xpath object that determines input document validity when applied to + # the svrl result report; must return a list of result elements (empty if + # valid) + _validation_errors = ASSERTS_ONLY + + def __init__(self, etree=None, file=None, include=True, expand=True, + include_params={}, expand_params={}, compile_params={}, + store_schematron=False, store_xslt=False, store_report=False, + phase=None, error_finder=ASSERTS_ONLY, + validate_schema=schematron_schema_valid_supported): + super().__init__() + + self._store_report = store_report + self._schematron = None + self._validator_xslt = None + self._validation_report = None + if error_finder is not self.ASSERTS_ONLY: + self._validation_errors = error_finder + + # parse schema document, may be a schematron schema or an XML Schema or + # a RelaxNG schema with embedded schematron rules + root = None + try: + if etree is not None: + if _etree.iselement(etree): + root = etree + else: + root = etree.getroot() + elif file is not None: + root = _etree.parse(file).getroot() + except Exception: + raise _etree.SchematronParseError( + "No tree or file given: %s" % sys.exc_info()[1]) + if root is None: + raise ValueError("Empty tree") + if root.tag == _schematron_root: + schematron = root + else: + schematron = self._extract(root) + if schematron is None: + raise _etree.SchematronParseError( + "Document is not a schematron schema or schematron-extractable") + # perform the iso-schematron skeleton implementation steps to get a + # validating xslt + if include: + schematron = self._include(schematron, **include_params) + if expand: + schematron = self._expand(schematron, **expand_params) + if validate_schema and not schematron_schema_valid(schematron): + raise _etree.SchematronParseError( + "invalid schematron schema: %s" % + schematron_schema_valid.error_log) + if store_schematron: + self._schematron = schematron + # add new compile keyword args here if exposing them + compile_kwargs = {'phase': phase} + compile_params = _stylesheet_param_dict(compile_params, compile_kwargs) + validator_xslt = self._compile(schematron, **compile_params) + if store_xslt: + self._validator_xslt = validator_xslt + self._validator = _etree.XSLT(validator_xslt) + + def __call__(self, etree): + """Validate doc using Schematron. + + Returns true if document is valid, false if not. + """ + self._clear_error_log() + result = self._validator(etree) + if self._store_report: + self._validation_report = result + errors = self._validation_errors(result) + if errors: + if _etree.iselement(etree): + fname = etree.getroottree().docinfo.URL or '<file>' + else: + fname = etree.docinfo.URL or '<file>' + for error in errors: + # Does svrl report the line number, anywhere? Don't think so. + self._append_log_message( + domain=self._domain, type=self._error_type, + level=self._level, line=0, + message=_etree.tostring(error, encoding='unicode'), + filename=fname) + return False + return True + + @property + def schematron(self): + """ISO-schematron schema document (None if object has been initialized + with store_schematron=False). + """ + return self._schematron + + @property + def validator_xslt(self): + """ISO-schematron skeleton implementation XSLT validator document (None + if object has been initialized with store_xslt=False). + """ + return self._validator_xslt + + @property + def validation_report(self): + """ISO-schematron validation result report (None if result-storing has + been turned off). + """ + return self._validation_report diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/rng/iso-schematron.rng b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/rng/iso-schematron.rng new file mode 100644 index 00000000..a4f504af --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/rng/iso-schematron.rng @@ -0,0 +1,709 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- Copyright © ISO/IEC 2015 --> +<!-- + The following permission notice and disclaimer shall be included in all + copies of this XML schema ("the Schema"), and derivations of the Schema: + + Permission is hereby granted, free of charge in perpetuity, to any + person obtaining a copy of the Schema, to use, copy, modify, merge and + distribute free of charge, copies of the Schema for the purposes of + developing, implementing, installing and using software based on the + Schema, and to permit persons to whom the Schema is furnished to do so, + subject to the following conditions: + + THE SCHEMA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SCHEMA OR THE USE OR + OTHER DEALINGS IN THE SCHEMA. + + In addition, any modified copy of the Schema shall include the following + notice: + + "THIS SCHEMA HAS BEEN MODIFIED FROM THE SCHEMA DEFINED IN ISO/IEC 19757-3, + AND SHOULD NOT BE INTERPRETED AS COMPLYING WITH THAT STANDARD". +--> +<grammar ns="http://purl.oclc.org/dsdl/schematron" xmlns="http://relaxng.org/ns/structure/1.0" datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes"> + <start> + <ref name="schema"/> + </start> + <!-- Element declarations --> + <define name="schema"> + <element name="schema"> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <ref name="rich"/> + <optional> + <attribute name="schemaVersion"> + <ref name="non-empty-string"/> + </attribute> + </optional> + <optional> + <attribute name="defaultPhase"> + <data type="IDREF"/> + </attribute> + </optional> + <optional> + <attribute name="queryBinding"> + <ref name="non-empty-string"/> + </attribute> + </optional> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <ref name="inclusion"/> + </zeroOrMore> + <group> + <optional> + <ref name="title"/> + </optional> + <zeroOrMore> + <ref name="ns"/> + </zeroOrMore> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <zeroOrMore> + <ref name="phase"/> + </zeroOrMore> + <oneOrMore> + <ref name="pattern"/> + </oneOrMore> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <optional> + <ref name="diagnostics"/> + </optional> + <optional> + <!-- edited (lxml): required in standard, optional here (since it can be empty anyway) --> + <ref name="properties"/> + </optional> + </group> + </interleave> + </element> + </define> + <define name="active"> + <element name="active"> + <attribute name="pattern"> + <data type="IDREF"/> + </attribute> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="dir"/> + <ref name="emph"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="assert"> + <element name="assert"> + <attribute name="test"> + <ref name="exprValue"/> + </attribute> + <optional> + <attribute name="flag"> + <ref name="flagValue"/> + </attribute> + </optional> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <attribute name="diagnostics"> + <data type="IDREFS"/> + </attribute> + </optional> + <optional> + <attribute name="properties"> + <data type="IDREFS"/> + </attribute> + </optional> + <ref name="rich"/> + <ref name="linkable"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="name"/> + <ref name="value-of"/> + <ref name="emph"/> + <ref name="dir"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="diagnostic"> + <element name="diagnostic"> + <attribute name="id"> + <data type="ID"/> + </attribute> + <ref name="rich"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="value-of"/> + <ref name="emph"/> + <ref name="dir"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="diagnostics"> + <element name="diagnostics"> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <ref name="inclusion"/> + </zeroOrMore> + <zeroOrMore> + <ref name="diagnostic"/> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="dir"> + <element name="dir"> + <optional> + <attribute name="value"> + <choice> + <value>ltr</value> + <value>rtl</value> + </choice> + </attribute> + </optional> + <interleave> + <ref name="foreign"/> + <text/> + </interleave> + </element> + </define> + <define name="emph"> + <element name="emph"> + <text/> + </element> + </define> + <define name="extends"> + <element name="extends"> + <choice> + <attribute name="rule"> + <data type="IDREF"/> + </attribute> + <attribute name="href"> + <ref name="uriValue"/> + </attribute> + </choice> + <ref name="foreign-empty"/> + </element> + </define> + <define name="let"> + <element name="let"> + <attribute name="name"> + <ref name="nameValue"/> + </attribute> + <choice> + <attribute name="value"> + <data type="string" datatypeLibrary=""/> + </attribute> + <oneOrMore> + <ref name="foreign-element"/> + </oneOrMore> + </choice> + </element> + </define> + <define name="name"> + <element name="name"> + <optional> + <attribute name="path"> + <ref name="pathValue"/> + </attribute> + </optional> + <ref name="foreign-empty"/> + </element> + </define> + <define name="ns"> + <element name="ns"> + <attribute name="uri"> + <ref name="uriValue"/> + </attribute> + <attribute name="prefix"> + <ref name="nameValue"/> + </attribute> + <ref name="foreign-empty"/> + </element> + </define> + <define name="p"> + <element name="p"> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <attribute name="class"> + <ref name="classValue"/> + </attribute> + </optional> + <optional> + <attribute name="icon"> + <ref name="uriValue"/> + </attribute> + </optional> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="dir"/> + <ref name="emph"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="param"> + <element name="param"> + <attribute name="name"> + <ref name="nameValue"/> + </attribute> + <attribute name="value"> + <ref name="non-empty-string"/> + </attribute> + </element> + </define> + <define name="pattern"> + <element name="pattern"> + <optional> + <attribute name="documents"> + <ref name="pathValue"/> + </attribute> + </optional> + <ref name="rich"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <ref name="inclusion"/> + </zeroOrMore> + <choice> + <group> + <attribute name="abstract"> + <value>true</value> + </attribute> + <attribute name="id"> + <data type="ID"/> + </attribute> + <optional> + <ref name="title"/> + </optional> + <group> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <zeroOrMore> + <ref name="rule"/> + </zeroOrMore> + </group> + </group> + <group> + <optional> + <attribute name="abstract"> + <value>false</value> + </attribute> + </optional> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <ref name="title"/> + </optional> + <group> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <zeroOrMore> + <ref name="rule"/> + </zeroOrMore> + </group> + </group> + <group> + <optional> + <attribute name="abstract"> + <value>false</value> + </attribute> + </optional> + <attribute name="is-a"> + <data type="IDREF"/> + </attribute> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <ref name="title"/> + </optional> + <group> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <zeroOrMore> + <ref name="param"/> + </zeroOrMore> + </group> + </group> + </choice> + </interleave> + </element> + </define> + <define name="phase"> + <element name="phase"> + <attribute name="id"> + <data type="ID"/> + </attribute> + <ref name="rich"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <ref name="inclusion"/> + </zeroOrMore> + <group> + <zeroOrMore> + <ref name="p"/> + </zeroOrMore> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <zeroOrMore> + <ref name="active"/> + </zeroOrMore> + </group> + </interleave> + </element> + </define> + <define name="properties"> + <element name="properties"> + <zeroOrMore> + <ref name="property"/> + </zeroOrMore> + </element> + </define> + <define name="property"> + <element name="property"> + <attribute name="id"> + <data type="ID"/> + </attribute> + <optional> + <attribute name="role"> + <ref name="roleValue"/> + </attribute> + </optional> + <optional> + <attribute name="scheme"/> + </optional> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="name"/> + <ref name="value-of"/> + <ref name="emph"/> + <ref name="dir"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="report"> + <element name="report"> + <attribute name="test"> + <ref name="exprValue"/> + </attribute> + <optional> + <attribute name="flag"> + <ref name="flagValue"/> + </attribute> + </optional> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <attribute name="diagnostics"> + <data type="IDREFS"/> + </attribute> + </optional> + <optional> + <attribute name="properties"> + <data type="IDREFS"/> + </attribute> + </optional> + <ref name="rich"/> + <ref name="linkable"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <choice> + <text/> + <ref name="name"/> + <ref name="value-of"/> + <ref name="emph"/> + <ref name="dir"/> + <ref name="span"/> + </choice> + </zeroOrMore> + </interleave> + </element> + </define> + <define name="rule"> + <element name="rule"> + <optional> + <attribute name="flag"> + <ref name="flagValue"/> + </attribute> + </optional> + <ref name="rich"/> + <ref name="linkable"/> + <interleave> + <ref name="foreign"/> + <zeroOrMore> + <ref name="inclusion"/> + </zeroOrMore> + <choice> + <group> + <attribute name="abstract"> + <value>true</value> + </attribute> + <attribute name="id"> + <data type="ID"/> + </attribute> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <oneOrMore> + <choice> + <ref name="assert"/> + <ref name="report"/> + <ref name="extends"/> + <ref name="p"/> + </choice> + </oneOrMore> + </group> + <group> + <attribute name="context"> + <ref name="pathValue"/> + </attribute> + <optional> + <attribute name="id"> + <data type="ID"/> + </attribute> + </optional> + <optional> + <attribute name="abstract"> + <value>false</value> + </attribute> + </optional> + <zeroOrMore> + <ref name="let"/> + </zeroOrMore> + <oneOrMore> + <choice> + <ref name="assert"/> + <ref name="report"/> + <ref name="extends"/> + <ref name="p"/> + </choice> + </oneOrMore> + </group> + </choice> + </interleave> + </element> + </define> + <define name="span"> + <element name="span"> + <attribute name="class"> + <ref name="classValue"/> + </attribute> + <interleave> + <ref name="foreign"/> + <text/> + </interleave> + </element> + </define> + <define name="title"> + <element name="title"> + <zeroOrMore> + <choice> + <text/> + <ref name="dir"/> + </choice> + </zeroOrMore> + </element> + </define> + <define name="value-of"> + <element name="value-of"> + <attribute name="select"> + <ref name="pathValue"/> + </attribute> + <ref name="foreign-empty"/> + </element> + </define> + <!-- common declarations --> + <define name="inclusion"> + <element name="include"> + <attribute name="href"> + <ref name="uriValue"/> + </attribute> + <ref name="foreign-empty"/> + </element> + </define> + <define name="rich"> + <optional> + <attribute name="icon"> + <ref name="uriValue"/> + </attribute> + </optional> + <optional> + <attribute name="see"> + <ref name="uriValue"/> + </attribute> + </optional> + <optional> + <attribute name="fpi"> + <ref name="fpiValue"/> + </attribute> + </optional> + <optional> + <attribute name="xml:lang"> + <ref name="langValue"/> + </attribute> + </optional> + <optional> + <attribute name="xml:space"> + <choice> + <value>preserve</value> + <value>default</value> + </choice> + </attribute> + </optional> + </define> + <define name="linkable"> + <optional> + <attribute name="role"> + <ref name="roleValue"/> + </attribute> + </optional> + <optional> + <attribute name="subject"> + <ref name="pathValue"/> + </attribute> + </optional> + </define> + <define name="foreign"> + <ref name="foreign-attributes"/> + <zeroOrMore> + <ref name="foreign-element"/> + </zeroOrMore> + </define> + <define name="foreign-empty"> + <ref name="foreign-attributes"/> + </define> + <define name="foreign-attributes"> + <zeroOrMore> + <attribute> + <anyName> + <except> + <nsName ns=""/> + <nsName ns="http://www.w3.org/XML/1998/namespace"/> + </except> + </anyName> + </attribute> + </zeroOrMore> + </define> + <define name="foreign-element"> + <element> + <anyName> + <except> + <nsName/> + </except> + </anyName> + <zeroOrMore> + <choice> + <attribute> + <anyName/> + </attribute> + <ref name="foreign-element"/> + <ref name="schema"/> + <text/> + </choice> + </zeroOrMore> + </element> + </define> + <!-- Data types --> + <define name="uriValue"> + <data type="anyURI"/> + </define> + <define name="pathValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="exprValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="fpiValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="langValue"> + <data type="language"/> + </define> + <define name="roleValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="flagValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="nameValue"> + <data type="string" datatypeLibrary=""/> + </define> + <!-- In the default query language binding, xsd:NCNAME --> + <define name="classValue"> + <data type="string" datatypeLibrary=""/> + </define> + <define name="non-empty-string"> + <data type="token"> + <param name="minLength">1</param> + </data> + </define> +</grammar> diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl new file mode 100644 index 00000000..21a5d2a0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl @@ -0,0 +1,75 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Stylesheet for extracting Schematron information from a RELAX-NG schema. + Based on the stylesheet for extracting Schematron information from W3C XML Schema. + Created by Eddie Robertsson 2002/06/01 + 2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl) +--> +<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" +xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:rng="http://relaxng.org/ns/structure/1.0"> + <!-- Set the output to be XML with an XML declaration and use indentation --> + <xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/> + <!-- --> + <!-- match schema and call recursive template to extract included schemas --> + <!-- --> + <xsl:template match="/rng:grammar | /rng:element"> + <!-- call the schema definition template ... --> + <xsl:call-template name="gatherSchema"> + <!-- ... with current node as the $schemas parameter ... --> + <xsl:with-param name="schemas" select="."/> + <!-- ... and any includes in the $include parameter --> + <xsl:with-param name="includes" select="document(/rng:grammar/rng:include/@href +| //rng:externalRef/@href)"/> + </xsl:call-template> + </xsl:template> + <!-- --> + <!-- gather all included schemas into a single parameter variable --> + <!-- --> + <xsl:template name="gatherSchema"> + <xsl:param name="schemas"/> + <xsl:param name="includes"/> + <xsl:choose> + <xsl:when test="count($schemas) < count($schemas | $includes)"> + <!-- when $includes includes something new, recurse ... --> + <xsl:call-template name="gatherSchema"> + <!-- ... with current $includes added to the $schemas parameter ... --> + <xsl:with-param name="schemas" select="$schemas | $includes"/> + <!-- ... and any *new* includes in the $include parameter --> + <xsl:with-param name="includes" select="document($includes/rng:grammar/rng:include/@href +| $includes//rng:externalRef/@href)"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <!-- we have the complete set of included schemas, so now let's output the embedded schematron --> + <xsl:call-template name="output"> + <xsl:with-param name="schemas" select="$schemas"/> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + <!-- --> + <!-- output the schematron information --> + <!-- --> + <xsl:template name="output"> + <xsl:param name="schemas"/> + <!-- --> + <sch:schema> + <!-- get header-type elements - eg title and especially ns --> + <!-- title (just one) --> + <xsl:copy-of select="$schemas//sch:title[1]"/> + <!-- get remaining schematron schema children --> + <!-- get non-blank namespace elements, dropping duplicates --> + <xsl:for-each select="$schemas//sch:ns"> + <xsl:if test="generate-id(.) = generate-id($schemas//sch:ns[@prefix = current()/@prefix][1])"> + <xsl:copy-of select="."/> + </xsl:if> + </xsl:for-each> + <xsl:copy-of select="$schemas//sch:phase"/> + <xsl:copy-of select="$schemas//sch:pattern"/> + <sch:diagnostics> + <xsl:copy-of select="$schemas//sch:diagnostics/*"/> + </sch:diagnostics> + </sch:schema> + </xsl:template> + <!-- --> +</xsl:transform> diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl new file mode 100644 index 00000000..de0c9ea7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl @@ -0,0 +1,77 @@ +<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ based on an original transform by Eddie Robertsson
+ 2001/04/21 fn: added support for included schemas
+ 2001/06/27 er: changed XMl Schema prefix from xsd: to xs: and changed to the Rec namespace
+ 2009/12/10 hj: changed Schematron namespace to ISO URI (Holger Joukl)
+-->
+<xsl:transform version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+xmlns:sch="http://purl.oclc.org/dsdl/schematron" xmlns:xs="http://www.w3.org/2001/XMLSchema">
+ <!-- Set the output to be XML with an XML declaration and use indentation -->
+ <xsl:output method="xml" omit-xml-declaration="no" indent="yes" standalone="yes"/>
+ <!-- -->
+ <!-- match schema and call recursive template to extract included schemas -->
+ <!-- -->
+ <xsl:template match="xs:schema">
+ <!-- call the schema definition template ... -->
+ <xsl:call-template name="gatherSchema">
+ <!-- ... with current current root as the $schemas parameter ... -->
+ <xsl:with-param name="schemas" select="/"/>
+ <!-- ... and any includes in the $include parameter -->
+ <xsl:with-param name="includes"
+ select="document(/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
+ </xsl:call-template>
+ </xsl:template>
+ <!-- -->
+ <!-- gather all included schemas into a single parameter variable -->
+ <!-- -->
+ <xsl:template name="gatherSchema">
+ <xsl:param name="schemas"/>
+ <xsl:param name="includes"/>
+ <xsl:choose>
+ <xsl:when test="count($schemas) < count($schemas | $includes)">
+ <!-- when $includes includes something new, recurse ... -->
+ <xsl:call-template name="gatherSchema">
+ <!-- ... with current $includes added to the $schemas parameter ... -->
+ <xsl:with-param name="schemas" select="$schemas | $includes"/>
+ <!-- ... and any *new* includes in the $include parameter -->
+ <xsl:with-param name="includes"
+ select="document($includes/xs:schema/xs:*[self::xs:include or self::xs:import or self::xs:redefine]/@schemaLocation)"/>
+ </xsl:call-template>
+ </xsl:when>
+ <xsl:otherwise>
+ <!-- we have the complete set of included schemas,
+ so now let's output the embedded schematron -->
+ <xsl:call-template name="output">
+ <xsl:with-param name="schemas" select="$schemas"/>
+ </xsl:call-template>
+ </xsl:otherwise>
+ </xsl:choose>
+ </xsl:template>
+ <!-- -->
+ <!-- output the schematron information -->
+ <!-- -->
+ <xsl:template name="output">
+ <xsl:param name="schemas"/>
+ <!-- -->
+ <sch:schema>
+ <!-- get header-type elements - eg title and especially ns -->
+ <!-- title (just one) -->
+ <xsl:copy-of select="$schemas//xs:appinfo/sch:title[1]"/>
+ <!-- get remaining schematron schema children -->
+ <!-- get non-blank namespace elements, dropping duplicates -->
+ <xsl:for-each select="$schemas//xs:appinfo/sch:ns">
+ <xsl:if test="generate-id(.) =
+ generate-id($schemas//xs:appinfo/sch:ns[@prefix = current()/@prefix][1])">
+ <xsl:copy-of select="."/>
+ </xsl:if>
+ </xsl:for-each>
+ <xsl:copy-of select="$schemas//xs:appinfo/sch:phase"/>
+ <xsl:copy-of select="$schemas//xs:appinfo/sch:pattern"/>
+ <sch:diagnostics>
+ <xsl:copy-of select="$schemas//xs:appinfo/sch:diagnostics/*"/>
+ </sch:diagnostics>
+ </sch:schema>
+ </xsl:template>
+ <!-- -->
+</xsl:transform>
diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl new file mode 100644 index 00000000..50183952 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl @@ -0,0 +1,313 @@ +<?xml version="1.0" encoding="UTF-8"?><?xar XSLT?> + +<!-- + OVERVIEW - iso_abstract_expand.xsl + + This is a preprocessor for ISO Schematron, which implements abstract patterns. + It also + * extracts a particular schema using an ID, where there are multiple + schemas, such as when they are embedded in the same NVDL script + * allows parameter substitution inside @context, @test, @select, @path + * experimentally, allows parameter recognition and substitution inside + text (NOTE: to be removed, for compataibility with other implementations, + please do not use this) + + This should be used after iso-dsdl-include.xsl and before the skeleton or + meta-stylesheet (e.g. iso-svrl.xsl) . It only requires XSLT 1. + + Each kind of inclusion can be turned off (or on) on the command line. + +--> + +<!-- +Open Source Initiative OSI - The MIT License:Licensing +[OSI Approved License] + +This source code was previously available under the zlib/libpng license. +Attribution is polite. + +The MIT License + +Copyright (c) 2004-2010 Rick Jellife and Academia Sinica Computing Centre, Taiwan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +--> + +<!-- +VERSION INFORMATION + 2013-09-19 RJ + * Allow macro expansion in @path attributes, eg. for sch:name/@path + + 2010-07-10 RJ + * Move to MIT license + + 2008-09-18 RJ + * move out param test from iso:schema template to work with XSLT 1. (Noah Fontes) + + 2008-07-29 RJ + * Create. Pull out as distinct XSL in its own namespace from old iso_pre_pro.xsl + * Put everything in private namespace + * Rewrite replace_substring named template so that copyright is clear + + 2008-07-24 RJ + * correct abstract patterns so for correct names: param/@name and + param/@value + + 2007-01-12 RJ + * Use ISO namespace + * Use pattern/@id not pattern/@name + * Add Oliver Becker's suggests from old Schematron-love-in list for <copy> + * Add XT -ism? + 2003 RJ + * Original written for old namespace + * http://www.topologi.com/resources/iso-pre-pro.xsl +--> +<xslt:stylesheet version="1.0" xmlns:xslt="http://www.w3.org/1999/XSL/Transform" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:iso="http://purl.oclc.org/dsdl/schematron" + xmlns:nvdl="http://purl.oclc.org/dsdl/nvdl" + + xmlns:iae="http://www.schematron.com/namespace/iae" + + > + + <xslt:param name="schema-id"></xslt:param> + + + <!-- Driver for the mode --> + <xsl:template match="/"> + <xsl:apply-templates select="." mode="iae:go" /> + </xsl:template> + + + <!-- ================================================================================== --> + <!-- Normal processing rules --> + <!-- ================================================================================== --> + <!-- Output only the selected schema --> + <xslt:template match="iso:schema" > + <xsl:if test="string-length($schema-id) =0 or @id= $schema-id "> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="iae:go" /> + </xslt:copy> + </xsl:if> + </xslt:template> + + + <!-- Strip out any foreign elements above the Schematron schema . + --> + <xslt:template match="*[not(ancestor-or-self::iso:*)]" mode="iae:go" > + <xslt:apply-templates mode="iae:go" /> + </xslt:template> + + + <!-- ================================================================================== --> + <!-- Handle Schematron abstract pattern preprocessing --> + <!-- abstract-to-real calls + do-pattern calls + macro-expand calls + multi-macro-expand + replace-substring --> + <!-- ================================================================================== --> + + <!-- + Abstract patterns allow you to say, for example + + <pattern name="htmlTable" is-a="table"> + <param name="row" value="html:tr"/> + <param name="cell" value="html:td" /> + <param name="table" value="html:table" /> + </pattern> + + For a good introduction, see Uche Ogbujii's article for IBM DeveloperWorks + "Discover the flexibility of Schematron abstract patterns" + http://www-128.ibm.com/developerworks/xml/library/x-stron.html + However, note that ISO Schematron uses @name and @value attributes on + the iso:param element, and @id not @name on the pattern element. + + --> + + <!-- Suppress declarations of abstract patterns --> + <xslt:template match="iso:pattern[@abstract='true']" mode="iae:go" > + <xslt:comment>Suppressed abstract pattern <xslt:value-of select="@id"/> was here</xslt:comment> + </xslt:template> + + + <!-- Suppress uses of abstract patterns --> + <xslt:template match="iso:pattern[@is-a]" mode="iae:go" > + + <xslt:comment>Start pattern based on abstract <xslt:value-of select="@is-a"/></xslt:comment> + + <xslt:call-template name="iae:abstract-to-real" > + <xslt:with-param name="caller" select="@id" /> + <xslt:with-param name="is-a" select="@is-a" /> + </xslt:call-template> + + </xslt:template> + + + + <!-- output everything else unchanged --> + <xslt:template match="*" priority="-1" mode="iae:go" > + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="iae:go"/> + </xslt:copy> + </xslt:template> + + <!-- Templates for macro expansion of abstract patterns --> + <!-- Sets up the initial conditions for the recursive call --> + <xslt:template name="iae:macro-expand"> + <xslt:param name="caller"/> + <xslt:param name="text" /> + <xslt:call-template name="iae:multi-macro-expand"> + <xslt:with-param name="caller" select="$caller"/> + <xslt:with-param name="text" select="$text"/> + <xslt:with-param name="paramNumber" select="1"/> + </xslt:call-template> + + </xslt:template> + + <!-- Template to replace the current parameter and then + recurse to replace subsequent parameters. --> + + <xslt:template name="iae:multi-macro-expand"> + <xslt:param name="caller"/> + <xslt:param name="text" /> + <xslt:param name="paramNumber" /> + + + <xslt:choose> + <xslt:when test="//iso:pattern[@id=$caller]/iso:param[ $paramNumber]"> + + <xslt:call-template name="iae:multi-macro-expand"> + <xslt:with-param name="caller" select="$caller"/> + <xslt:with-param name="paramNumber" select="$paramNumber + 1"/> + <xslt:with-param name="text" > + <xslt:call-template name="iae:replace-substring"> + <xslt:with-param name="original" select="$text"/> + <xslt:with-param name="substring" + select="concat('$', //iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@name)"/> + <xslt:with-param name="replacement" + select="//iso:pattern[@id=$caller]/iso:param[ $paramNumber ]/@value"/> + </xslt:call-template> + </xslt:with-param> + </xslt:call-template> + </xslt:when> + <xslt:otherwise><xslt:value-of select="$text" /></xslt:otherwise> + + </xslt:choose> + </xslt:template> + + + <!-- generate the real pattern from an abstract pattern + parameters--> + <xslt:template name="iae:abstract-to-real" > + <xslt:param name="caller"/> + <xslt:param name="is-a" /> + <xslt:for-each select="//iso:pattern[@id= $is-a]"> + <xslt:copy> + + <xslt:choose> + <xslt:when test=" string-length( $caller ) = 0"> + <xslt:attribute name="id"><xslt:value-of select="concat( generate-id(.) , $is-a)" /></xslt:attribute> + </xslt:when> + <xslt:otherwise> + <xslt:attribute name="id"><xslt:value-of select="$caller" /></xslt:attribute> + </xslt:otherwise> + </xslt:choose> + + <xslt:apply-templates select="*|text()" mode="iae:do-pattern" > + <xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param> + </xslt:apply-templates> + + </xslt:copy> + </xslt:for-each> + </xslt:template> + + + <!-- Generate a non-abstract pattern --> + <xslt:template mode="iae:do-pattern" match="*"> + <xslt:param name="caller"/> + <xslt:copy> + <xslt:for-each select="@*[name()='test' or name()='context' or name()='select' or name()='path' ]"> + <xslt:attribute name="{name()}"> + <xslt:call-template name="iae:macro-expand"> + <xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param> + <xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param> + </xslt:call-template> + </xslt:attribute> + </xslt:for-each> + <xslt:copy-of select="@*[name()!='test'][name()!='context'][name()!='select'][name()!='path']" /> + <xsl:for-each select="node()"> + <xsl:choose> + <!-- Experiment: replace macros in text as well, to allow parameterized assertions + and so on, without having to have spurious <iso:value-of> calls and multiple + delimiting. + NOTE: THIS FUNCTIONALITY WILL BE REMOVED IN THE FUTURE --> + <xsl:when test="self::text()"> + <xslt:call-template name="iae:macro-expand"> + <xslt:with-param name="text"><xslt:value-of select="."/></xslt:with-param> + <xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param> + </xslt:call-template> + </xsl:when> + <xsl:otherwise> + <xslt:apply-templates select="." mode="iae:do-pattern"> + <xslt:with-param name="caller"><xslt:value-of select="$caller"/></xslt:with-param> + </xslt:apply-templates> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + </xslt:copy> + </xslt:template> + + <!-- UTILITIES --> + <!-- Simple version of replace-substring function --> + <xslt:template name="iae:replace-substring"> + <xslt:param name="original" /> + <xslt:param name="substring" /> + <xslt:param name="replacement" select="''"/> + + <xsl:choose> + <xsl:when test="not($original)" /> + <xsl:when test="not(string($substring))"> + <xsl:value-of select="$original" /> + </xsl:when> + <xsl:when test="contains($original, $substring)"> + <xsl:variable name="before" select="substring-before($original, $substring)" /> + <xsl:variable name="after" select="substring-after($original, $substring)" /> + + <xsl:value-of select="$before" /> + <xsl:value-of select="$replacement" /> + <!-- recursion --> + <xsl:call-template name="iae:replace-substring"> + <xsl:with-param name="original" select="$after" /> + <xsl:with-param name="substring" select="$substring" /> + <xsl:with-param name="replacement" select="$replacement" /> + </xsl:call-template> + </xsl:when> + <xsl:otherwise> + <!-- no substitution --> + <xsl:value-of select="$original" /> + </xsl:otherwise> + </xsl:choose> +</xslt:template> + + + +</xslt:stylesheet>
\ No newline at end of file diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl new file mode 100644 index 00000000..44e5573b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl @@ -0,0 +1,1160 @@ +<?xml version="1.0" encoding="UTF-8"?><?xar XSLT?> + +<!-- + OVERVIEW : iso_dsdl_include.xsl + + This is an inclusion preprocessor for the non-smart text inclusions + of ISO DSDL. It handles + <relax:extRef> for ISO RELAX NG + <sch:include> for ISO Schematron and Schematron 1.n + <sch:extends> for 2009 draft ISO Schematron + <xi:xinclude> simple W3C XIncludes for ISO NVRL and DSRL + <crdl:ref> for draft ISO CRDL + <dtll:include> for draft ISO DTLL + <* @xlink:href> for simple W3C XLink 1.1 embedded links + + + This should be the first in any chain of processing. It only requires + XSLT 1. Each kind of inclusion can be turned off (or on) on the command line. + + Ids in fragment identifiers or xpointers will be sought in the following + order: + * @xml:id + * id() for typed schemas (e.g. from DTD) [NOTE: XInclude does not support this] + * untyped @id + + The proposed behaviour for the update to ISO Schematron has been implemented. If an + include points to an element with the same name as the parent, then that element's + contents will be included. This supports the merge style of inclusion. + + When an inclusion is made, it is preceded by a PI with target DSDL_INCLUDE_START + and the href and closed by a PI with target DSDL_INCLUDE_START and the href. This is + to allow better location of problems, though only to the file level. + + Limitations: + * No rebasing: relative paths will be interpreted based on the initial document's + path, not the including document. (Severe limitation!) + * No checking for circular references + * Not full xpointers: only ID matching + * <relax:include> not implemented + * XInclude handling of xml:base and xml:lang not implemented +--> +<!-- + VERSION INFORMATION + 2009-02-25 + * Update DSDL namespace to use schematron.com + * Tested with SAXON9, Xalan 2.7.1, IE7, + * IE does not like multiple variables in same template with same name: rename. + 2008-09-18 + * Remove new behaviour for include, because it conflicts with existing usage [KH] + * Add extends[@href] element with that merge functionality + * Generate PIs to notate source of inclusions for potential better diagnostics + + 2008-09-16 + * Fix for XSLT1 + + 2008-08-28 + * New behaviour for schematron includes: if the pointed to element is the same as the current, + include the children. + + 2008-08-20 + * Fix bug: in XSLT1 cannot do $document/id('x') but need to use for-each + + 2008-08-04 + * Add support for inclusions in old namespace + + 2008-08-03 + * Fix wrong param name include-relaxng & include-crdl (KH, PH) + * Allow inclusion of XSLT and XHTML (KH) + * Fix inclusion of fragments (KH) + + 2008-07-25 + * Add selectable input parameter + + 2008-07-24 + * RJ New +--> +<!-- + LEGAL INFORMATION + + Copyright (c) 2008 Rick Jelliffe + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from + the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it freely, + subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a product, + an acknowledgment in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. +--> +<xslt:stylesheet version="1.0" + xmlns:xslt="http://www.w3.org/1999/XSL/Transform" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:iso="http://purl.oclc.org/dsdl/schematron" + xmlns:nvdl="http://purl.oclc.org/dsdl/nvdl" + xmlns:xhtml="http://www.w3.org/1999/xhtml" + xmlns:schold="http://www.ascc.net/xml/schematron" + xmlns:crdl="http://purl.oclc.org/dsdl/crepdl/ns/structure/1.0" + xmlns:xi="http://www.w3.org/2001/XInclude" + xmlns:dtll="http://www.jenitennison.com/datatypes" + xmlns:dsdl="http://www.schematron.com/namespace/dsdl" + xmlns:relax="http://relaxng.org/ns/structure/1.0" + xmlns:xlink="http://www.w3.org/1999/xlink"> + <!-- Note: The URL for the dsdl namespace is not official --> + + + <xsl:param name="include-schematron">true</xsl:param> + <xsl:param name="include-crdl">true</xsl:param> + <xsl:param name="include-xinclude">true</xsl:param> + <xsl:param name="include-dtll">true</xsl:param> + <xsl:param name="include-relaxng">true</xsl:param> + <xsl:param name="include-xlink">true</xsl:param> + + <xsl:template match="/"> + <xsl:apply-templates select="." mode="dsdl:go" /> + </xsl:template> + + <!-- output everything else unchanged --> + <xslt:template match="node()" priority="-1" mode="dsdl:go"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xslt:template> + + + + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 2 - Regular grammar-based validation - RELAX NG --> + <!-- This only implements relax:extRef not relax:include which --> + <!-- is complex. --> + <!-- =========================================================== --> + <xslt:template match="relax:extRef" mode="dsdl:go"> + + + <!-- Insert subschema --> + + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + <xsl:choose> + <xsl:when test="not( $include-relaxng = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in RELAX NG extRef + include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//*[@xml:id= $fragment-id ] | id( $fragment-id) | //*[@id= $fragment-id ]" /> + </xslt:when> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use a for-each so that the id() function works correctly on the external document --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select="$theDocument_1//*[@xml:id= $fragment-id ] + | id( $fragment-id) + | $theDocument_1//*[@id= $fragment-id ]" /> + <xsl:if test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates + select=" $theFragment_1[1]" mode="dsdl:go" /> + </xsl:for-each> + </xsl:when> + + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/*" /> + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <xsl:if test="not($theFragment_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates select="$theFragment_2 " + mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xslt:template> + + + + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 3 - Rule-based validation - Schematron --> + <!-- =========================================================== --> + + + <!-- Extend the URI syntax to allow # references --> + <!-- Add experimental support for simple containers like /xxx:xxx/iso:pattern to allow better includes --> + <xsl:template match="iso:include" mode="dsdl:go"> + + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + + + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + + <xsl:choose> + <xsl:when test="not( $include-schematron = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in Schematron include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//iso:*[@xml:id= $fragment-id ] + |id( $fragment-id) + | //iso:*[@id= $fragment-id ]" /> + </xslt:when> + + <!-- case where there is a fragment in another document (should be an iso: element) --> + <!-- There are three cases for includes with fragment: + 0) No href file or no matching id - error! + 1) REMOVED + + 2) The linked-to element is sch:schema however the parent of the include + is not a schema. In this case, it is an error. (Actually, it should + be an error for other kinds of containment problems, but we won't + check for them in this version.) + + 3) Otherwise, include the pointed-to element + --> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:variable name="originalParent" select=".." /> + + <!-- case 0 --> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to external document --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select=" $theDocument_1//iso:*[@xml:id= $fragment-id ] | + id($fragment-id) | + $theDocument_1//iso:*[@id= $fragment-id ]" /> + + + <xsl:choose> + <!-- case 0 --> + <xsl:when test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:when> + + + <!-- case 1 REMOVED --> + + <!-- case 2 --> + <xsl:when + test=" $theFragment_1/self::iso:schema "> + <xsl:message> + Schema error: Use include to + include fragments, not a whole + schema + </xsl:message> + </xsl:when> + + <!-- case 3 --> + <xsl:otherwise> + <xsl:apply-templates + select=" $theFragment_1[1]" mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + </xsl:when> + + <!-- Case where there is no ID so we include the whole document --> + <!-- Experimental addition: include fragments of children --> + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/iso:*" /> + <xsl:variable name="theContainedFragments" + select="$theDocument_2/*/iso:* | $theDocument_2/*/xsl:* | $theDocument_2/*/xhtml:*" /> + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <!-- There are three cases for includes: + 0) No text specified- error! + + 1) REMOVED + + 2) The linked-to element is sch:schema however the parent of the include + is not a schema. In this case, it is an error. (Actually, it should + be an error for other kinds of containment problems, but we won't + check for them in this version.) + + 3) Otherwise, include the pointed-to element + --> + <xsl:choose> + <!-- case 0 --> + <xsl:when + test="not($theFragment_2) and not ($theContainedFragments)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:when> + + <!-- case 1 removed --> + + <!-- case 2 --> + <xsl:when + test=" $theFragment_2/self::iso:schema or $theContainedFragments/self::iso:schema"> + <xsl:message> + Schema error: Use include to include + fragments, not a whole schema + </xsl:message> + </xsl:when> + + <!-- If this were XLST 2, we could use + if ($theFragment) then $theFragment else $theContainedFragments + here (thanks to KN) + --> + <!-- case 3 --> + <xsl:otherwise> + <xsl:apply-templates + select="$theFragment_2 " mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xsl:template> + + + <!-- WARNING sch:extends[@href] is experimental and non standard --> + <!-- Basically, it adds the children of the selected element, not the element itself. --> + <xsl:template match="iso:extends[@href]" mode="dsdl:go"> + + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + + + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + + <xsl:choose> + <xsl:when test="not( $include-schematron = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in Schematron include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//iso:*[@xml:id= $fragment-id ]/* + |id( $fragment-id)/* + | //iso:*[@id= $fragment-id ]/*" /> + </xslt:when> + + <!-- case where there is a fragment in another document (should be an iso: element) --> + <!-- There are three cases for includes with fragment: + 0) No href file or no matching id - error! + 1) REMOVED + + 2) REMOVED + + 3) Otherwise, include the pointed-to element + --> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:variable name="originalParent" select=".." /> + + <!-- case 0 --> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to external document --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select=" $theDocument_1//iso:*[@xml:id= $fragment-id ] | + id($fragment-id) | + $theDocument_1//iso:*[@id= $fragment-id ]" /> + + + <xsl:choose> + <!-- case 0 --> + <xsl:when test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:when> + + + <!-- case 1 REMOVED --> + + <!-- case 2 REMOVED --> + + + <!-- case 3 --> + <xsl:otherwise> + + <xsl:apply-templates + select=" $theFragment_1[1]/*" mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + </xsl:for-each> + </xsl:when> + + <!-- Case where there is no ID so we include the whole document --> + <!-- Experimental addition: include fragments of children --> + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/iso:*" /> + <xsl:variable name="theContainedFragments" + select="$theDocument_2/*/iso:* | $theDocument_2/*/xsl:* | $theDocument_2/*/xhtml:*" /> + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <!-- There are three cases for includes: + 0) No text specified- error! + + 1) REMOVED + + 2) REMOVED + + 3) Otherwise, include the pointed-to element + --> + <xsl:choose> + <!-- case 0 --> + <xsl:when + test="not($theFragment_2) and not ($theContainedFragments)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:when> + + <!-- case 1 removed --> + + <!-- case 2 removed --> + + <!-- If this were XLST 2, we could use + if ($theFragment) then $theFragment else $theContainedFragments + here (thanks to KN) + --> + <!-- case 3 --> + <xsl:otherwise> + <xsl:apply-templates + select="$theFragment_2/* " mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xsl:template> + + + + <!-- =========================================================== --> + <!-- Handle Schematron 1.6 inclusions: clone of ISO code above --> + <!-- =========================================================== --> + + + <!-- Extend the URI syntax to allow # references --> + <!-- Add experimental support for simple containers like /xxx:xxx/schold:pattern to allow better includes --> + <xsl:template match="schold:include" mode="dsdl:go"> + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + + <xsl:choose> + <xsl:when test="not( $include-schematron = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in Schematron include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//schold:*[@xml:id= $fragment-id ] + |id( $fragment-id) + | //schold:*[@id= $fragment-id ]" /> + </xslt:when> + + <!-- case where there is a fragment in another document (should be an iso: element) --> + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to $theDocument --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select=" $theDocument_1//schold:*[@xml:id= $fragment-id ] | + id($fragment-id) | + $theDocument_1//schold:*[@id= $fragment-id ]" /> + <xsl:if + test=" $theFragment_1/self::schold:schema "> + <xsl:message> + Schema error: Use include to include + fragments, not a whole schema + </xsl:message> + </xsl:if> + <xsl:if test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates + select=" $theFragment_1[1]" mode="dsdl:go" /> + </xsl:for-each> + </xsl:when> + + <!-- Case where there is no ID so we include the whole document --> + <!-- Experimental addition: include fragments of children --> + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/iso:*" /> + <xsl:variable name="theContainedFragments" + select="$theDocument_2/*/schold:* | $theDocument_2/*/xsl:* | $theDocument_2/*/xhtml:*" /> + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <xsl:if + test=" $theFragment_2/self::schold:schema or $theContainedFragments/self::schold:schema"> + <xsl:message> + Schema error: Use include to include + fragments, not a whole schema + </xsl:message> + </xsl:if> + <xsl:if + test="not($theFragment_2) and not ($theContainedFragments)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- If this were XLST 2, we could use + if ($theFragment) then $theFragment else $theContainedFragments + here (thanks to KN) + --> + <xsl:choose> + <xsl:when test=" $theFragment_2 "> + <xsl:apply-templates + select="$theFragment_2 " mode="dsdl:go" /> + </xsl:when> + <xsl:otherwise> + <!-- WARNING! EXPERIMENTAL! Use at your own risk. This may be discontinued! --> + <xsl:apply-templates + select=" $theContainedFragments " mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xsl:template> + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 5 - DataType Library Language - DTLL --> + <!-- Committee Draft Experimental support only --> + <!-- The <include> element may well be replaced by XInclude in --> + <!-- any final version. --> + <!-- =========================================================== --> + <xslt:template match="dtll:include" mode="dsdl:go"> + <!-- Insert subschema --> + + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + <xsl:choose> + <xsl:when test="not( $include-dtll = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in DTLL include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//*[@xml:id= $fragment-id ] | id( $fragment-id) + | //*[@id= $fragment-id ]" /> + </xslt:when> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to $theDocument --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select="$theDocument_1//*[@xml:id= $fragment-id ] + | id( $fragment-id ) + | $theDocument_1//*[@id= $fragment-id ]" /> + <xsl:if test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates + select=" $theFragment_1[1]" mode="dsdl:go" /> + </xsl:for-each> + </xsl:when> + + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/*" /> + + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <xsl:if test="not($theFragment_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates select="$theFragment_2 " + mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xslt:template> + + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 7 - Character Repertoire Description Language - CRDL --> + <!-- Final Committee Draft 2008-01-11 Experimental support only --> + <!-- =========================================================== --> + <xslt:template match="crdl:ref" mode="dsdl:go"> + <!-- Insert subschema --> + + <xsl:variable name="document-uri" + select="substring-before(concat(@href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@href, '#')" /> + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + <xsl:choose> + <xsl:when test="not( $include-crdl = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in CRDL include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + + <xslt:apply-templates mode="dsdl:go" + select="//*[@xml:id= $fragment-id ] | id( $fragment-id) + | //*[@id= $fragment-id ]" /> + </xslt:when> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to $theDocument --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select="$theDocument_1//*[@xml:id= $fragment-id ] + | id( $fragment-id ) + | $theDocument_1//*[@id= $fragment-id ]" /> + + <xsl:if test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates select=" $theFragment_1 " + mode="dsdl:go" /> + </xsl:for-each> + </xsl:when> + + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/*" /> + + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + <xsl:if test="not($theFragment_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <xsl:apply-templates select="$theFragment_2" + mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xslt:template> + + + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 4 - Namespace-based Validation Dispatching Language - NVDL --> + <!-- Note: This does not include schemas referenced for --> + <!-- validation, it merely handles any simple XIncludes --> + <!-- =========================================================== --> + <!-- ISO/IEC 19757 - DSDL Document Schema Definition Languages --> + <!-- Part 8 - Document Schema Renaming Language - DSRL --> + <!-- Note: Final? Committee Draft Experimental support only --> + <!-- =========================================================== --> + <!-- XInclude support for id based references only, with 1 level --> + <!-- of fallback. --> + <!-- =========================================================== --> + + <xslt:template mode="dsdl:go" + match="xi:include[@href][not(@parseType) or @parseType ='xml']"> + <!-- Simple inclusions only here --> + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + <xsl:choose> + <xsl:when test="not( $include-xinclude = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + + <xsl:when test="contains( @href, '#')"> + <xsl:message terminate="yes"> + Fatal error: Xinclude href contains fragment + identifier # + </xsl:message> + </xsl:when> + + + <xsl:when test="contains( @xpointer, '(')"> + <xsl:message terminate="yes"> + Fatal error: Sorry, this software only + supports simple ids in XInclude xpointers + </xsl:message> + </xsl:when> + + <xsl:when + test="string-length( @href ) = 0 and string-length( @xpointer ) = 0"> + + <xsl:message terminate="yes"> + Fatal Error: Impossible URL in XInclude + include + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when test="string-length( @href ) = 0"> + + <xslt:apply-templates mode="dsdl:go" + select="//*[@xml:id= current()/@xpointer ] | id( @xpointer) + | //*[@id= current()/@xpointer ]" /> + </xslt:when> + + <xsl:when + test="string-length( @xpointer ) > 0"> + <xsl:variable name="theDocument_1" + select="document( @href,/ )" /> + <xsl:variable name="theFragment_1" + select="$theDocument_1//*[@xml:id= current()/@xpointer ] + + | $theDocument_1//*[@id= current()/@xpointer ]" /> + <!-- removed + | $theDocument_1/id( @xpointer) + because it requires rebasing in XSLT1 and that would mess up the use of current() + --> + + + <!-- Allow one level of fallback, to another XInclude --> + <xsl:if test="not($theDocument_1)"> + <xsl:choose> + <xsl:when test="xi:fallback"> + <xsl:variable name="theDocument_2" + select="document( xi:fallback[1]/xi:include[not(@parseType) + or @parseType='xml']/@href,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2//*[@xml:id= current()/xi:fallback[1]/xi:include/@xpointer ] + | $theDocument_2//*[@id= current()/xi:fallback[1]/xi:include/@xpointer ]" /> + <!-- removed + | $theDocument_2/id( xi:fallback[1]/xi:include/@xpointer) + because it id() would need rebasing in XSLT1 and that would mess up use of current() + --> + + <xsl:if + test="not($theDocument_2)"> + + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file and fallback + file: </xsl:text> + <xsl:value-of + select="@href" /> + </xsl:message> + </xsl:if> + </xsl:when> + <xsl:otherwise> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:otherwise> + </xsl:choose> + </xsl:if> + <xsl:apply-templates select=" $theFragment_1" + mode="dsdl:go" /> + </xsl:when> + + <!-- Document but no fragment specified --> + <xsl:otherwise> + <xsl:variable name="theDocument_3" + select="document( @href,/ )" /> + <xsl:variable name="theFragment_3" + select="$theDocument_3/*" /> + + <xsl:if test="not($theDocument_3)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@href" /> + </xsl:message> + </xsl:if> + + <xsl:apply-templates select="$theFragment_3 " + mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@href" /> + </xsl:processing-instruction> + </xslt:template> + + <!-- =========================================================== --> + <!-- W3C XLink 1.1 embedded simple links --> + <!-- =========================================================== --> + <xslt:template + match="*[@xlink:href][not(parent::*[@xlink:type='complex'])] + [not(@xlink:type) or (@xlink:type='simple')] + [@xlink:show='embed'] + [not(@xlink:actuate) or (@xlink:actuate='onLoad')]" + mode="dsdl:go" priority="1"> + + <xsl:variable name="document-uri" + select="substring-before(concat(@xlink:href,'#'), '#')" /> + <xsl:variable name="fragment-id" + select="substring-after(@xlink:href, '#')" /> + <xsl:processing-instruction name="DSDL_INCLUDE_START"> + <xsl:value-of select="@xlink:href" /> + </xsl:processing-instruction> + <xsl:choose> + <xsl:when test="not( $include-xlink = 'true' )"> + <xslt:copy> + <xslt:copy-of select="@*" /> + <xslt:apply-templates mode="dsdl:go" /> + </xslt:copy> + </xsl:when> + <xsl:otherwise> + <xsl:choose> + + <xsl:when + test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0"> + <xsl:message> + Error: Impossible URL in XLink embedding + link + </xsl:message> + </xsl:when> + + <!-- this case is when there is in embedded schema in the same document elsewhere --> + <xslt:when + test="string-length( $document-uri ) = 0"> + <xslt:apply-templates mode="dsdl:go" + select="//*[@xml:id= $fragment-id ] | id( $fragment-id) + | //*[@id= $fragment-id ]" /> + </xslt:when> + + <xsl:when + test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" + select="document( $document-uri,/ )" /> + <xsl:if test="not($theDocument_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@xlink:href" /> + </xsl:message> + </xsl:if> + <!-- use for-each to rebase id() to $theDocument --> + <xsl:for-each select="$theDocument_1"> + <xsl:variable name="theFragment_1" + select="$theDocument_1//*[@xml:id= $fragment-id ] + | id( $fragment-id ) + | $theDocument_1//*[@id= $fragment-id ]" /> + <xsl:if test="not($theFragment_1)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@xlink:href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates + select=" $theFragment_1[1]" mode="dsdl:go" /> + </xsl:for-each> + </xsl:when> + + <xsl:otherwise> + <xsl:variable name="theDocument_2" + select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" + select="$theDocument_2/*" /> + + <xsl:if test="not($theDocument_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to open referenced included file: </xsl:text> + <xsl:value-of select="@xlink:href" /> + </xsl:message> + </xsl:if> + + <xsl:if test="not($theFragment_2)"> + <xsl:message terminate="no"> + <xsl:text>Unable to locate id attribute: </xsl:text> + <xsl:value-of select="@xlink:href" /> + </xsl:message> + </xsl:if> + <xsl:apply-templates select="$theFragment_2 " + mode="dsdl:go" /> + </xsl:otherwise> + </xsl:choose> + + </xsl:otherwise> + </xsl:choose> + + <xsl:processing-instruction name="DSDL_INCLUDE_END"> + <xsl:value-of select="@xlink:href" /> + </xsl:processing-instruction> + </xslt:template> + + +</xslt:stylesheet>
\ No newline at end of file diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl new file mode 100644 index 00000000..d59b8f38 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl @@ -0,0 +1,55 @@ +<?xml version="1.0" ?><?xar XSLT?> +<!-- Implmentation for the Schematron XML Schema Language. + http://www.ascc.net/xml/resource/schematron/schematron.html + + Copyright (c) 2000,2001 Rick Jelliffe and Academia Sinica Computing Center, Taiwan + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from + the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it freely, + subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a product, + an acknowledgment in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. +--> + +<!-- Schematron message --> + +<xsl:stylesheet + version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias"> + +<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/> + +<xsl:template name="process-prolog"> + <axsl:output method="text" /> +</xsl:template> + +<!-- use default rule for process-root: copy contens / ignore title --> +<!-- use default rule for process-pattern: ignore name and see --> +<!-- use default rule for process-name: output name --> +<!-- use default rule for process-assert and process-report: + call process-message --> + +<xsl:template name="process-message"> + <xsl:param name="pattern" /> + <xsl:param name="role" /> + <axsl:message> + <xsl:apply-templates mode="text" + /> (<xsl:value-of select="$pattern" /> + <xsl:if test="$role"> / <xsl:value-of select="$role" /> + </xsl:if>)</axsl:message> +</xsl:template> + +</xsl:stylesheet>
\ No newline at end of file diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl new file mode 100644 index 00000000..b0e7175c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl @@ -0,0 +1,1796 @@ +<?xml version="1.0"?><?xar XSLT?> + +<!-- + OVERVIEW + + ASCC/Schematron.com Skeleton Module for ISO Schematron (for XSLT1 systems) + + ISO Schematron is a language for making assertion about the presence or absence + of patterns in XML documents. It is typically used for as a schema language, or + to augment existing schema languages, and to check business rules. It is very + powerful, yet quite simple: a developer only need know XPath and about five other + elements. + + This is an open source implementation of ISO Schematron in XSLT. Although ISO does + not allow reference implementations which might compete with the text of the + standard, this code has been compiled by Rick Jelliffe, inventor of Schematron + and editor of the ISO standard; so developers can certainly use it as an + unofficial reference implementation for clarification. + + This implementation is based on one by Oliver Becker. API documentation is + available separately; try www.schematron.com for this. Funding for this + stylesheet over the years has come from Topologi Pty. Ltd., Geotempo Ltd., + and ASCC, Tapei. + + There are two versions of this skeleton: one is tailored for XSLT1 processors + and the other is tailored for XSLT2 processors. Future versions of the + XSLT2 skeleton may support more features than that the XSLT 1 skeleton. +--> +<!-- + TIPS + + A tip for new users of Schematron: make your assertions contain positive messages + about what is expected, rather than error messages. For example, use the form + "An X should have a Y, because Z". + + Another tip is that Schematron provides an + element <sch:ns> for declaring the namespaces and prefixes used in Xpaths in + attribute values; it does not extend the XML Namespaces mechanism: if a name + in an XPath has a prefix, there must be an <sch:ns> element for that prefix; if + a name in an XPath does not have a prefix, it is always in no namespace. + + A tip for implementers of Schematron, either using this API or re-implementing it: + make the value of the diagnostics, flags and richer features available if possible; + Schematron has many of the optional richer features which, if implemented, provide + a compelling alternative approach to validation and business-rules checking compared + to other schema languages and programs. + + If you create your own meta-stylesheet to override this one, it is a + good idea to have both in the same directory and to run the stylesheet + from that directory, as many XSLT implementations have ideosyncratic + handling of URLs: keep it simple. +--> + + +<!-- + INVOCATION INFORMATION + + The following parameters are available + + phase NMTOKEN | "#ALL" (default) Select the phase for validation + allow-foreign "true" | "false" (default) Pass non-Schematron elements to the generated stylesheet + sch.exslt.imports semi-colon delimited string of filenames for some EXSLT implementations + message-newline "true" (default) | "false" Generate an extra newline at the end of messages + optimize "visit-no-attributes" + debug "true" | "false" (default) Debug mode lets compilation continue despite problems + attributes "true" | "false" (Autodetecting) Use only when the schema has no attributes as the context nodes + only-child-elements "true" | "false" (Autodetecting) Use only when the schema has no comments + or PI as the context nodes + + The following parameters can be specified as Schematron variables in diagnostics, assertions and so on. + fileNameParameter string + fileDirParameter string + archiveNameParameter string In case of ZIP files + archiveDirParameter string In case of ZIP files + output-encoding Use when outputting to XML + + Experimental: USE AT YOUR OWN RISK + visit-text "true" "false" Also visist text nodes for context. WARNING: NON_STARDARD. + select-contents '' | 'key' | '//' Select different implementation strategies + + Conventions: Meta-stylesheets that override this may use the following parameters + generate-paths=true|false generate the @location attribute with XPaths + diagnose= yes | no Add the diagnostics to the assertion test in reports + terminate= yes | no Terminate on the first failed assertion or successful report +--> + +<!-- + XSLT VERSION SUPPORT + + XSLT 1: + A schema using the standard XSLT 1 query binding will have a /schema/@queryBinding='xslt' or + nothing. + + * Note: XT does not implement key() and will die if given it. + * Add all formal parameters to default templates + * Fix missing apply-templates from process-ns and add params back + + EXSLT: Experimental support + A schema using the EXSLT query binding will have a /schema/@queryBinding='exslt'. + It is built on XSLT 1. After experience is gained, this binding is expected to be + formalized as part of ISO Schematron, which currently reserves the "exslt" name for this purpose. + + Some EXSLT engines have the extra functions built-in. For these, there is no need to + provide library locations. For engines that require the functions, either hard code + them in this script or provide them on the command-line argument. + +--> +<!-- + PROCESS INFORMATION + + This stylesheet compiles a Schematron schema (*.sch) into XSLT code (*.xsl). + The generated XSLT code can then be run against an XML file (*.xml, etc) and + will produce validation results. + + The output of validation results is performed using named templates (process-*). + These can be overridden easily by making a new XSLT stylesheet that imports this + stylesheet but has its own version of the relevant process-* templates. Several + of these invoking stylesheets are available: "iso_svrl.xsl", for example generates + ISO Schematron Validation Report Language format results. + + In this version of the stylesheet, the ISO feature called "abstract patterns" is + implemented using macro processing: a prior XSLT stage to which converts uses + of abstract patterns into normal patterns. If you do not use abstract patterns, + it is not necessary to preprocess the schema. + + To summarize, a basic process flow for some commandline processor is like this: + XSLT -input=xxx.sch -output=xxx.xsl -stylesheet=iso_schematron_skeleton.xsl + XSLT -input=document.xml -output=xxx-document.results -stylesheet=xxx.xsl + + iso_svrl.xslt is an implementation of Schematron that can use this skeleton and + generate ISO SVRL reports. A process flow for some commandline processor would + be like this: + XSLT -input=xxx.sch -output=xxx.xsl -stylesheet=iso_svrl.xsl + XSLT -input=document.xml -output=xxx-document.results -stylesheet=xxx.xsl + + It is not impossible that ultimately a third stage, to handle macro-preprocessing + and inclusion, might be necessary. (The trade-off is in making this XSLT more + complex compared to making the outer process more complex.) + + This version has so far been tested with + Saxon 8 + MSXML 4 (or 6?) + + Please note that if you are using SAXON and JAXP, then you should use + System.setProperty("javax.xml.transform.TransformerFactory", + "net.sf.saxon.TransformerFactoryImpl"); + rather than + System.setProperty("javax.xml.xpath.TransformerFactory", + "net.sf.saxon.TransformerFactoryImpl"); + which is does not work, at least for the versions of SAXON we tried. +--> +<!-- + LEGAL INFORMATION + + Copyright (c) 2000-2008 Rick Jelliffe and Academia Sinica Computing Center, Taiwan + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from + the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it freely, + subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a product, + an acknowledgment in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. +--> +<!-- + NOTE: Compared to the iso_schematron_skeleton_for_saxon.xsl code, this version is currently missing + 1) localization + 2) properties + 3) pattern/@documents + + VERSION INFORMATION + 2009-02-25 RJ + * Fix up variable names so none are used twice in same template + * Tested on SAXON 9, Xalan 2.7.1. Partly tested MSXML. + 2008-09-19 RJ + * Add mode schematron-select-full-path and param full-path-notation + + 2008-08-11 + * TT report/@flag was missing + 2008-08-06 + * TT Top-level lets need to be implemented using xsl:param not xsl:variable + * TT xsl:param/@select must have XPath or not be specified + + Version: 2008-07-28 + * KH schematron-get-full-path-3 has [index] even on top step + * RJ fix schematron-get-full-path to have namespace predicate, I don't know why this was removed + + Version: 2008-07-24 + * RJ clean out commented out namespace handling code + * RJ add support for experimental non-standard attribute report/@action + and assert/@action, and add parameter not in the published API (should + not break anything, it is XSLT1) + * RJ Remove remaining XSLT2 code for ease of reading + + Version: 2008-07-14 minor update for inclusion experiments + * RJ Clean up zero-length fragment test on include + * RJ Add experimental support for include containers + * RJ For path generation, test for //iso:schema not just /iso:schema, for potential embedded Schematron support + * RJ Don't generate double error messages for old namespace elements + * RJ Experimental iso:rule/iso:title just kept as comment (bigger request Uche Ogbuji) + * RJ Remove spurious debug messages + * RJ Fix bug that prevented including patterns in this (report Roger + Costello) + + Version: 2007-10-17 + From this version on I am forking XSLT2 support to a different version of the script. + This is due to the increasingly horrible state of the namespace handling code as well + as other inconsistencies between the major implementations of different versions. + The intent is that future versions of this will have XSLT2 isms removed and be simplified + to cope with only XSLT1 and EXLST. Note that though this version is called + iso_schematron_skeleton_for_xslt1, the various meta-stylesheets will continue to just call + iso_schematron_skeleton: it is up to you to rename the stylesheet to the one you want to + use. + + * RJ fix FULL-PATH problem with attribute names + + + Version: 2007-07-19 + Accept most changes in David Carlisle's fork, but continue as XSLT1 script: + http://dpcarlisle.blogspot.com/search/label/schematron + * DPC Remove "optimize" parameter + * DPC Add autodetecting optimize parameter attribute to skip checking attribute + context + * DPC Add autodetecting optimize parameter only-child-elements turn off checking for + comments and PIs + * DPC (Experimental: NON_STANDARD DANGER!) Add param visit-text to viist text + nodes too for context + * DPC Fix inclusion syntax to allow # + * DPC Priorities count up from 1000 not down from 4000 to allow more rules + * RJ Add new template for titles of schemas, with existing behaviour. + Override process-schema-title for custom processing of title + + + Version: 2007-04-04 + * RJ debug mode param + * RJ alter mixed test to only test mixed branches, so the same document + could have old and new namespaces schemas in it, but each schema must + be distinct, just so as not to overconstrain things. + * KH zero-length include/@href is fatal error, but allow debug mode + * SB add hint on SAXON and JAXP + * DC generate-full-path-1 generates XLST1 code by default + Version: 2007-03-05 + * AS Typo for EXSLT randome, improve comment + * KH get-schematron-full-path-2 needs to apply to attributes too + * DP document policy on extensions better + * DC use copy-of not copy for foreign elements + * DC add generate-path-2 + * DC don't try to apply templates to attribute axis on attribute nodes, to + stop SAXON warning. + * RJ improve reporting of typos + + Version: 2007-02-08 + * KH Schematron fullpath implementation: @* handled twice and / missing + * KH Change stylesheetbody from named template to mode to allow implementers more flexibility. + Move process-ns to outside the stylesheet body. + * DP, FG, fix handling of xslt:key + * FG no iso:title/@class + * Experimental optimization 'visit-no-attributes' + * KH Experimental added schematron-get-full-path-2 which gives prefixed version for humans + * DC Move stylesheet/@version generation to after namespace handling + * DC, FG EXSLT namespace handling code + * FG add ref and commented code from FG's page on namespaces + * Start adding normalize-space() to parameter code + * Add a space between diagnostics + + Version: 2007-01-22 + * DP change = ($start) to = $start and =($phase) to =$phase + to run under Saxon 8.8j + * FG better title section using ( @id | sch:title)[last()] + * Default query language binding is "xslt" not "xslt1" + + Version: 2007-01-19 + * Simplify message newline code + * Remove termination and xpath appending to message options: + factor out as iso_schematron_terminator.xsl + * Comment out XSLT2 namespace fix temporarily + + Version: 2007-01-18 (First beta candidate for comment) + * DC remove xml:space="preserve" + * FG improve comment on import statement + * DC improve comments on invocation section + * Add exploratory support for sch:schema[@queryBinding='xpath'] + by allowing it and warning as lets are found + * Be strict about queryBinding spelling errors + * Extra comments on the different queryBindings + * KH Add option "message-paths" to generate XPath from output + * KH Add option "terminate" to halt with an error after the first assertion + * KH refactor paths in schematron-full-path + * Improve (?) namespace handling: no dummy attributes for prefix "xsl" generated + + Version: 2007-01-15 + * FG fix for calling templates + * Add formal parameters to default templates: may help XSLT 2 + * Fix get-schematron-full-path + * Include skeleton1-6 is commented out by default + + Version:2007-01-12 (Pre-beta release to Schematron-love-in maillist) + * Add many extra parameters to the process-* calls, so that almost + all the information in the schema can be provided to client programs. + Also, rearrange the parameters to fit in with the ISO schema, which + has "rich" and "linkable" attribute groups. + * Warn on diagnostics with no ID once only + * Improved path reporting, to handle for namespaces + * Add process-title dummy template for API + * Add command-line parameter allow-foreign (true|false) to suppress + warnings one foreign elements and pass them through to the generated + stylesheet + * remove legacy templates for the old ASCC namespace and no namespace, + and use an import statement instead. Much cleaner now! + * patterns use @id not @name + * titles can contain sub-elements + * start change sch:rule to allow attributes, PIs and comments + * the default process-* for inline elements add a leading and trailing + space, to reduce the chance of concatenation. + * add comments to make the generated code clearer + + Version:2006-11-07 (ISO: first release private to schematron-love-in maillist for review) + * Duplicate pattern templates, for handling ISO namespace + * Add priority onto default and paragraph templates + * Add namespace checks + * Handle key in xsl namespace not iso + * Add include + * Improve namespace handling + * Preliminary XSLT2 and EXSLT support + * Refactor iso:schema for clarity + + Version: 2003-05-26 + * Fix bug with key + Version: 2003-04-16 + * handle 1.6 let expressions + * make key use XSLT names, and allow anywhere + Version: 2001-06-13 + * same skeleton now supports namespace or no namespace + * parameters to handlers updated for all 1.5 attributes + * diagnostic hints supported: command-line option diagnose=yes|no + * phases supported: command-line option phase=#ALL|... + * abstract rules + * compile-time error messages + * add utility routine generate-id-from-path + + Contributors: Rick Jelliffe (original), Oliver Becker (architecture, XSLT2), + Miloslav Nic (diagnostic, phase, options), Ludwig Svenonius (abstract) + Uche Ogbuji (misc. bug fixes), Jim Ancona (SAXON workaround), + Francis Norton (generate-id-from-path), Robert Leftwich, Bryan Rasmussen, + Dave Pawson (include, fallback), Florent Georges (namespaces, exslt, attribute + context), Benoit Maisonny (attribute context), John Dumps (process-message newline), + Cliff Stanford (diagnostics and other newlines) + + + KNOWN TYPICAL LIMITATIONS: + * Don't use <sch:ns prefix="xsl" .../> with a namespace other than the standard + XSLT one. This would be a bizarre thing to do anyway. + * Don't use other prefixes for the XSLT namespace either; some implementations will + not handle it correctly. + + EXTENSIONS: + ISO Schematron is designed as a framework with some standard query language + bindings. If you need to support other features, please do so safely by making + up your own @queryLanguage name: this makes it clear that your schema requires + special features. For example, default ISO Schematron does not support user + defined functions; so if you want to use the user defined function feature + in XSLT, you need to have a schema with some queryBinding attribute name like + "XSLT-with-my-functions" or whatever. +--> + + + + +<xsl:stylesheet version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias" + xmlns:sch="http://www.ascc.net/xml/schematron" + xmlns:iso="http://purl.oclc.org/dsdl/schematron" + xmlns:exsl="http://exslt.org/common" + xmlns:msxsl="urn:schemas-microsoft-com:xslt" + extension-element-prefixes="exsl msxsl" + > +<!-- This program implements ISO Schematron, except for abstract patterns which require a preprocess. --> + + +<xsl:namespace-alias stylesheet-prefix="axsl" result-prefix="xsl"/> + + +<!-- Category: top-level-element --> +<xsl:output method="xml" omit-xml-declaration="no" standalone="yes" indent="yes"/> + + +<xsl:param name="phase"> + <xsl:choose> + <xsl:when test="//sch:schema/@defaultPhase"> + <xsl:value-of select="//sch:schema/@defaultPhase"/> + </xsl:when> + <xsl:when test="//iso:schema/@defaultPhase"> + <xsl:value-of select="//iso:schema/@defaultPhase"/> + </xsl:when> + <xsl:otherwise>#ALL</xsl:otherwise> + </xsl:choose> +</xsl:param> + +<xsl:param name="allow-foreign">false</xsl:param> + +<xsl:param name="message-newline">true</xsl:param> + +<!-- DPC set to true if contexts should be checked on attribute nodes + defaults to true if there is any possibility that a context could match an attribute, + err on the side if caution, a context of *[.='@'] would cause this param to defualt to true + even though @ is in a string +--> +<xsl:param name="attributes"> + <xsl:choose> + <xsl:when test="//iso:rule[contains(@context,'@') or contains(@context,'attribute')]">true</xsl:when> + <xsl:otherwise>false</xsl:otherwise> + </xsl:choose> +</xsl:param> + +<!-- DPC set to true if contexts should be checked on just elements in the child axis + defaults to true if there is any possibility that a context could match an comment or PI + err on the side if caution, a context of *[.='('] would cause this param to defualt to true + even though ( is in a string, but node() comment() and processing-instruction() all have a ( +--> +<xsl:param name="only-child-elements"> + <xsl:choose> + <xsl:when test="//iso:rule[contains(@context,'(')]">true</xsl:when> + <xsl:otherwise>false</xsl:otherwise> + </xsl:choose> +</xsl:param> + +<!-- DPC set to true if contexts should be checked on text nodes nodes (if only-child-elements is false) + THIS IS NON CONFORMANT BEHAVIOUR JUST FOR DISCUSSION OF A POSSIBLE CHANGE TO THE + SPECIFICATION. THIS PARAM SHOULD GO IF THE FINAL DECISION IS THAT THE SPEC DOES NOT CHANGE. + Always defaults to false +--> +<xsl:param name="visit-text" select="'false'"/> + +<!-- DPC + When selecting contexts the specified behaviour is + @*|node()[not(self::text())] + The automatic settings may use + node()[not(self::text())] + @*|* + * + instead for schema for which they are equivalent. + If the params are set explictly the above may be used, and also either if + @* + @*|node() + in all cases the result may not be equivalent, for example if you specify no attributes and the schema + does have attribute contexts they will be silently ignored. + + after testing it turns out that + node()[not(self::text())] is slower in saxon than *|comment()|processing-instruction() + which I find a bit surprising but anyway I'll use the longr faster version. +--> +<xsl:variable name="context-xpath"> + <xsl:if test="$attributes='true'">@*|</xsl:if> + <xsl:choose> + <xsl:when test="$only-child-elements='true'">*</xsl:when> + <xsl:when test="$visit-text='true'">node()</xsl:when> + <xsl:otherwise>*|comment()|processing-instruction()</xsl:otherwise> + </xsl:choose> +</xsl:variable> + +<!-- DPC if this is set to + '' use recursive templates to iterate over document tree, + 'key' select all contexts with a key rather than walking the tree explictly in each mode + '//' select all contexts with // a key rather than walking the tree explictly in each mode (XSLT2 only) +--> +<xsl:param name="select-contexts" select="''"/> + + +<xsl:param name="output-encoding"/> +<!-- e.g. saxon file.xml file.xsl "sch.exslt.imports=.../string.xsl;.../math.xsl" --> +<xsl:param name="sch.exslt.imports"/> + +<!-- Set the language code for messages --> +<xsl:param name="langCode">default</xsl:param> + +<xsl:param name="debug">false</xsl:param> + + +<!-- Set the default for schematron-select-full-path, i.e. the notation for svrl's @location--> +<xsl:param name="full-path-notation">1</xsl:param> + +<!-- Simple namespace check --> +<xsl:template match="/"> + <xsl:if test="//sch:*[ancestor::iso:* or descendant::iso:*]"> + <xsl:message>Schema error: Schematron elements in old and new namespaces found</xsl:message> + <xsl:if test=" $debug = 'false' " /> + </xsl:if> + + <xsl:apply-templates /> +</xsl:template> + + +<!-- ============================================================== --> +<!-- ISO SCHEMATRON SCHEMA ELEMENT --> +<!-- Not handled: Abstract patterns. A pre-processor is assumed. --> +<!-- ============================================================== --> + +<!-- SCHEMA --> +<!-- Default uses XSLT 1 --> +<xsl:template match="iso:schema[not(@queryBinding) or @queryBinding='xslt' + or @queryBinding='xslt1' or @queryBinding='XSLT' or @queryBinding='XSLT1' + or @queryBinding='xpath']"> + <xsl:if test=" + @queryBinding='xslt1' or @queryBinding='XSLT' or @queryBinding='XSLT1'"> + <xsl:message>Schema error: in the queryBinding attribute, use 'xslt'</xsl:message> + </xsl:if> + <axsl:stylesheet> + <xsl:apply-templates select="iso:ns"/> + <!-- Handle the namespaces before the version attribute: reported to help SAXON --> + <xsl:attribute name="version">1.0</xsl:attribute> + + <xsl:apply-templates select="." mode="stylesheetbody"/> + <!-- was xsl:call-template name="stylesheetbody"/ --> + </axsl:stylesheet> +</xsl:template> + +<!-- Using EXSLT with all modeles (except function module: not applicable) --> +<xsl:template match="iso:schema[@queryBinding='exslt']" priority="10"> + <xsl:comment>This XSLT was automatically generated from a Schematron schema.</xsl:comment> + <axsl:stylesheet + xmlns:date="http://exslt.org/dates-and-times" + xmlns:dyn="http://exslt.org/dynamic" + xmlns:exsl="http://exslt.org/common" + xmlns:math="http://exslt.org/math" + xmlns:random="http://exslt.org/random" + xmlns:regexp="http://exslt.org/regular-expressions" + xmlns:set="http://exslt.org/sets" + xmlns:str="http://exslt.org/strings" + extension-element-prefixes="date dyn exsl math random regexp set str" > + + <xsl:apply-templates select="iso:ns"/> + <!-- Handle the namespaces before the version attribute: reported to help SAXON --> + <xsl:attribute name="version">1.0</xsl:attribute> + + <xsl:apply-templates select="." mode="stylesheetbody"/> + <!-- was xsl:call-template name="stylesheetbody"/ --> + </axsl:stylesheet> +</xsl:template> + + +<!-- Default uses XSLT 1 --> +<xsl:template match="iso:schema" priority="-1"> + <xsl:message terminate="yes" >Fail: This implementation of ISO Schematron does not work with + schemas using the "<xsl:value-of select="@queryBinding"/>" query language.</xsl:message> +</xsl:template> + +<xsl:template match="*" mode="stylesheetbody"> + <!--xsl:template name="stylesheetbody"--> + <xsl:comment>Implementers: please note that overriding process-prolog or process-root is + the preferred method for meta-stylesheets to use where possible. </xsl:comment><xsl:text> </xsl:text> + + <!-- These parameters may contain strings with the name and directory of the file being + validated. For convenience, if the caller only has the information in a single string, + that string could be put in fileDirParameter. The archives parameters are available + for ZIP archives. + --> + + <axsl:param name="archiveDirParameter" /> + <axsl:param name="archiveNameParameter" /> + <axsl:param name="fileNameParameter" /> + <axsl:param name="fileDirParameter" /> + + <xsl:call-template name="iso:exslt.add.imports" /> + <xsl:text> </xsl:text><xsl:comment>PHASES</xsl:comment><xsl:text> </xsl:text> + <xsl:call-template name="handle-phase"/> + <xsl:text> </xsl:text><xsl:comment>PROLOG</xsl:comment><xsl:text> </xsl:text> + <xsl:call-template name="process-prolog"/> + <xsl:text> </xsl:text><xsl:comment>KEYS</xsl:comment><xsl:text> </xsl:text> + <xsl:apply-templates mode="do-keys" select="xsl:key "/> + <xsl:text> </xsl:text><xsl:comment>DEFAULT RULES</xsl:comment><xsl:text> </xsl:text> + <xsl:call-template name="generate-default-rules" /> + <xsl:text> </xsl:text><xsl:comment>SCHEMA METADATA</xsl:comment><xsl:text> </xsl:text> + <xsl:call-template name="handle-root"/> + <xsl:text> </xsl:text><xsl:comment>SCHEMATRON PATTERNS</xsl:comment><xsl:text> </xsl:text> + + <xsl:apply-templates select="*[not(self::iso:ns)] " /> +</xsl:template> + + <xsl:template name="iso:exslt.add.imports"> + <xsl:param name="imports" select="$sch.exslt.imports"/> + <xsl:choose> + <xsl:when test="contains($imports, ';')"> + <axsl:import href="{ substring-before($imports, ';') }"/> + <xsl:call-template name="iso:exslt.add.imports"> + <xsl:with-param name="imports" select="substring-after($imports, ';')"/> + </xsl:call-template> + </xsl:when> + <xsl:when test="$imports"> + <axsl:import href="{ $imports }"/> + </xsl:when> + </xsl:choose> + </xsl:template> + +<xsl:template name="handle-phase" > + <xsl:if test="not(normalize-space( $phase ) = '#ALL')"> + <xsl:if test="not(iso:phase[@id = normalize-space( $phase )])"> + <xsl:message>Phase Error: no phase with name <xsl:value-of select="normalize-space( $phase )" + /> has been defined.</xsl:message> + </xsl:if> + </xsl:if> +</xsl:template> + +<xsl:template name="generate-default-rules"> + <xsl:text> </xsl:text> + <xsl:comment>MODE: SCHEMATRON-SELECT-FULL-PATH</xsl:comment><xsl:text> </xsl:text> + <xsl:comment>This mode can be used to generate an ugly though full XPath for locators</xsl:comment><xsl:text> </xsl:text> + <axsl:template match="*" mode="schematron-select-full-path"> + <xsl:choose> + <xsl:when test=" $full-path-notation = '1' "> + <!-- Use for computers, but rather unreadable for humans --> + <axsl:apply-templates select="." mode="schematron-get-full-path"/> + </xsl:when> + <xsl:when test=" $full-path-notation = '2' "> + <!-- Use for humans, but no good for paths unless namespaces are known out-of-band --> + <axsl:apply-templates select="." mode="schematron-get-full-path-2"/> + </xsl:when> + <xsl:when test=" $full-path-notation = '3' "> + <!-- Obsolescent. Use for humans, but no good for paths unless namespaces are known out-of-band --> + <axsl:apply-templates select="." mode="schematron-get-full-path-3"/> + </xsl:when> + + <xsl:otherwise > + <!-- Use for computers, but rather unreadable for humans --> + <axsl:apply-templates select="." mode="schematron-get-full-path"/> + </xsl:otherwise> + </xsl:choose> + </axsl:template> + + + <xsl:text> </xsl:text> + <xsl:comment>MODE: SCHEMATRON-FULL-PATH</xsl:comment><xsl:text> </xsl:text> + <xsl:comment>This mode can be used to generate an ugly though full XPath for locators</xsl:comment><xsl:text> </xsl:text> + <axsl:template match="*" mode="schematron-get-full-path"> + <axsl:apply-templates select="parent::*" mode="schematron-get-full-path"/> + + <!-- XSLT1 syntax --> + + <axsl:text>/</axsl:text> + <axsl:choose> + <axsl:when test="namespace-uri()=''"> + <axsl:value-of select="name()"/> + <axsl:variable name="p_1" select="1+ + count(preceding-sibling::*[name()=name(current())])" /> + <axsl:if test="$p_1>1 or following-sibling::*[name()=name(current())]"> + <xsl:text/>[<axsl:value-of select="$p_1"/>]<xsl:text/> + </axsl:if> + </axsl:when> + <axsl:otherwise> + <axsl:text>*[local-name()='</axsl:text> + <axsl:value-of select="local-name()"/><axsl:text>' and namespace-uri()='</axsl:text> + <axsl:value-of select="namespace-uri()"/> + <axsl:text>']</axsl:text> + <axsl:variable name="p_2" select="1+ + count(preceding-sibling::*[local-name()=local-name(current())])" /> + <axsl:if test="$p_2>1 or following-sibling::*[local-name()=local-name(current())]"> + <xsl:text/>[<axsl:value-of select="$p_2"/>]<xsl:text/> + </axsl:if> + </axsl:otherwise> + </axsl:choose> + </axsl:template> + + + <axsl:template match="@*" mode="schematron-get-full-path"> + + <!-- XSLT1 syntax --> + <axsl:text>/</axsl:text> + <axsl:choose> + <axsl:when test="namespace-uri()=''">@<axsl:value-of + select="name()"/></axsl:when> + <axsl:otherwise> + <axsl:text>@*[local-name()='</axsl:text> + <axsl:value-of select="local-name()"/> + <axsl:text>' and namespace-uri()='</axsl:text> + <axsl:value-of select="namespace-uri()"/> + <axsl:text>']</axsl:text> + </axsl:otherwise> + </axsl:choose> + + </axsl:template> + + + <xsl:text> </xsl:text> + + <xsl:comment>MODE: SCHEMATRON-FULL-PATH-2</xsl:comment> + <xsl:text> </xsl:text> + <xsl:comment>This mode can be used to generate prefixed XPath for humans</xsl:comment> + <xsl:text> </xsl:text> + <!--simplify the error messages by using the namespace prefixes of the + instance rather than the generic namespace-uri-styled qualification--> + <axsl:template match="node() | @*" mode="schematron-get-full-path-2"> + <!--report the element hierarchy--> + <axsl:for-each select="ancestor-or-self::*"> + <axsl:text>/</axsl:text> + <axsl:value-of select="name(.)"/> + <axsl:if test="preceding-sibling::*[name(.)=name(current())]"> + <axsl:text>[</axsl:text> + <axsl:value-of + select="count(preceding-sibling::*[name(.)=name(current())])+1"/> + <axsl:text>]</axsl:text> + </axsl:if> + </axsl:for-each> + <!--report the attribute--> + <axsl:if test="not(self::*)"> + <axsl:text/>/@<axsl:value-of select="name(.)"/> + </axsl:if> + </axsl:template> + + <xsl:text> </xsl:text> + <xsl:comment>MODE: GENERATE-ID-FROM-PATH </xsl:comment><xsl:text> </xsl:text> + <!-- repeatable-id maker derived from Francis Norton's. --> + <!-- use this if you need generate ids in separate passes, + because generate-id() is not guaranteed to produce the same + results each time. These ids are not XML names but closer to paths. --> + <axsl:template match="/" mode="generate-id-from-path"/> + <axsl:template match="text()" mode="generate-id-from-path"> + <axsl:apply-templates select="parent::*" mode="generate-id-from-path"/> + <axsl:value-of select="concat('.text-', 1+count(preceding-sibling::text()), '-')"/> + </axsl:template> + <axsl:template match="comment()" mode="generate-id-from-path"> + <axsl:apply-templates select="parent::*" mode="generate-id-from-path"/> + <axsl:value-of select="concat('.comment-', 1+count(preceding-sibling::comment()), '-')"/> + </axsl:template> + <axsl:template match="processing-instruction()" mode="generate-id-from-path"> + <axsl:apply-templates select="parent::*" mode="generate-id-from-path"/> + <axsl:value-of + select="concat('.processing-instruction-', 1+count(preceding-sibling::processing-instruction()), '-')"/> + </axsl:template> + <axsl:template match="@*" mode="generate-id-from-path"> + <axsl:apply-templates select="parent::*" mode="generate-id-from-path"/> + <axsl:value-of select="concat('.@', name())"/> + </axsl:template> + <axsl:template match="*" mode="generate-id-from-path" priority="-0.5"> + <axsl:apply-templates select="parent::*" mode="generate-id-from-path"/> + <axsl:text>.</axsl:text> +<!-- + <axsl:choose> + <axsl:when test="count(. | ../namespace::*) = count(../namespace::*)"> + <axsl:value-of select="concat('.namespace::-',1+count(namespace::*),'-')"/> + </axsl:when> + <axsl:otherwise> +--> + <axsl:value-of + select="concat('.',name(),'-',1+count(preceding-sibling::*[name()=name(current())]),'-')"/> +<!-- + </axsl:otherwise> + </axsl:choose> +--> + </axsl:template> + + + <xsl:comment>MODE: SCHEMATRON-FULL-PATH-3</xsl:comment> + + <xsl:text> </xsl:text> + <xsl:comment>This mode can be used to generate prefixed XPath for humans + (Top-level element has index)</xsl:comment> + <xsl:text> </xsl:text> + <!--simplify the error messages by using the namespace prefixes of the + instance rather than the generic namespace-uri-styled qualification--> + <axsl:template match="node() | @*" mode="schematron-get-full-path-3"> + <!--report the element hierarchy--> + <axsl:for-each select="ancestor-or-self::*"> + <axsl:text>/</axsl:text> + <axsl:value-of select="name(.)"/> + <axsl:if test="parent::*"> + <axsl:text>[</axsl:text> + <axsl:value-of + select="count(preceding-sibling::*[name(.)=name(current())])+1"/> + <axsl:text>]</axsl:text> + </axsl:if> + </axsl:for-each> + <!--report the attribute--> + <axsl:if test="not(self::*)"> + <axsl:text/>/@<axsl:value-of select="name(.)"/> + </axsl:if> + </axsl:template> + + <xsl:text> </xsl:text> + <xsl:comment>MODE: GENERATE-ID-2 </xsl:comment><xsl:text> </xsl:text> + <!-- repeatable-id maker from David Carlisle. --> + <!-- use this if you need generate IDs in separate passes, + because generate-id() is not guaranteed to produce the same + results each time. These IDs are well-formed XML NMTOKENS --> + <axsl:template match="/" mode="generate-id-2">U</axsl:template> + + <axsl:template match="*" mode="generate-id-2" priority="2"> + <axsl:text>U</axsl:text> + <axsl:number level="multiple" count="*"/> + </axsl:template> + + <axsl:template match="node()" mode="generate-id-2"> + <axsl:text>U.</axsl:text> + <axsl:number level="multiple" count="*"/> + <axsl:text>n</axsl:text> + <axsl:number count="node()"/> + </axsl:template> + + <axsl:template match="@*" mode="generate-id-2"> + <axsl:text>U.</axsl:text> + <axsl:number level="multiple" count="*"/> + <axsl:text>_</axsl:text> + <axsl:value-of select="string-length(local-name(.))"/> + <axsl:text>_</axsl:text> + <axsl:value-of select="translate(name(),':','.')"/> + </axsl:template> + + + <xsl:comment>Strip characters</xsl:comment> + <axsl:template match="text()" priority="-1" /> + + </xsl:template> + + <xsl:template name="handle-root"> + <!-- Process the top-level element --> + <axsl:template match="/"> + <xsl:call-template name="process-root"> + <xsl:with-param + name="title" select="(@id | iso:title)[last()]"/> + <xsl:with-param name="version" select="'iso'" /> + <xsl:with-param name="schemaVersion" select="@schemaVersion" /> + <xsl:with-param name="queryBinding" select="@queryBinding" /> + <xsl:with-param name="contents"> + <xsl:apply-templates mode="do-all-patterns"/> + </xsl:with-param> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + + + <!-- Non-standard extensions not part of the API yet --> + <xsl:with-param name="action" select="@action" /> + </xsl:call-template> + </axsl:template> + + +</xsl:template> + +<!-- ============================================================== --> +<!-- ISO SCHEMATRON ELEMENTS --> +<!-- ============================================================== --> + + <!-- ISO ACTIVE --> + <xsl:template match="iso:active"> + <xsl:if test="not(@pattern)"> + <xsl:message>Markup Error: no pattern attribute in <active></xsl:message> + </xsl:if> + + <xsl:if test="not(../../iso:pattern[@id = current()/@pattern]) + and not(../../iso:include)"> + <xsl:message>Reference Error: the pattern "<xsl:value-of select="@pattern" + />" has been activated but is not declared</xsl:message> + </xsl:if> + </xsl:template> + + <!-- ISO ASSERT and REPORT --> + <xsl:template match="iso:assert"> + + <xsl:if test="not(@test)"> + <xsl:message>Markup Error: no test attribute in <assert</xsl:message> + </xsl:if> + <xsl:text> </xsl:text> + <xsl:comment>ASSERT <xsl:value-of select="@role" /> </xsl:comment><xsl:text> </xsl:text> + + <axsl:choose> + <axsl:when test="{@test}"/> + <axsl:otherwise> + <xsl:call-template name="process-assert"> + <xsl:with-param name="test" select="normalize-space(@test)" /> + <xsl:with-param name="diagnostics" select="@diagnostics"/> + <xsl:with-param name="flag" select="@flag"/> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + + <!-- "Linking" properties --> + <xsl:with-param name="role" select="@role" /> + <xsl:with-param name="subject" select="@subject" /> + </xsl:call-template> + + </axsl:otherwise> + </axsl:choose> + </xsl:template> + <xsl:template match="iso:report"> + + <xsl:if test="not(@test)"> + <xsl:message>Markup Error: no test attribute in <report></xsl:message> + </xsl:if> + + <xsl:text> </xsl:text> + <xsl:comment>REPORT <xsl:value-of select="@role" /> </xsl:comment><xsl:text> </xsl:text> + + <axsl:if test="{@test}"> + + <xsl:call-template name="process-report"> + <xsl:with-param name="test" select="normalize-space(@test)" /> + <xsl:with-param name="diagnostics" select="@diagnostics"/> + <xsl:with-param name="flag" select="@flag"/> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + + <!-- "Linking" properties --> + <xsl:with-param name="role" select="@role" /> + <xsl:with-param name="subject" select="@subject" /> + </xsl:call-template> + + </axsl:if> + </xsl:template> + + + <!-- ISO DIAGNOSTIC --> + <!-- We use a mode here to maintain backwards compatability, instead of adding it + to the other mode. + --> + <xsl:template match="iso:diagnostic" mode="check-diagnostics"> + <xsl:if test="not(@id)"> + <xsl:message>Markup Error: no id attribute in <diagnostic></xsl:message> + </xsl:if> + </xsl:template> + + <xsl:template match="iso:diagnostic" > + <xsl:call-template name="process-diagnostic"> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + </xsl:call-template> + </xsl:template> + + <!-- ISO DIAGNOSTICS --> + <xsl:template match="iso:diagnostics" > + <xsl:apply-templates mode="check-diagnostics" select="*" /> + </xsl:template> + + <!-- ISO DIR --> + <xsl:template match="iso:dir" mode="text" > + <xsl:call-template name="process-dir"> + <xsl:with-param name="value" select="@value"/> + </xsl:call-template> + </xsl:template> + + <!-- ISO EMPH --> + <xsl:template match="iso:emph" mode="text"> + + <xsl:call-template name="process-emph"/> + + </xsl:template> + + <!-- ISO EXTENDS --> + <xsl:template match="iso:extends"> + <xsl:if test="not(@rule)"> + <xsl:message>Markup Error: no rule attribute in <extends></xsl:message> + </xsl:if> + <xsl:if test="not(//iso:rule[@abstract='true'][@id= current()/@rule] )"> + <xsl:message>Reference Error: the abstract rule "<xsl:value-of select="@rule" + />" has been referenced but is not declared</xsl:message> + </xsl:if> + <xsl:call-template name="IamEmpty" /> + + <xsl:if test="//iso:rule[@id=current()/@rule]"> + <xsl:apply-templates select="//iso:rule[@id=current()/@rule]" + mode="extends"/> + </xsl:if> + + </xsl:template> + + <!-- KEY: ISO has no KEY --> + <!-- NOTE: + Key has had a checkered history. Schematron 1.0 allowed it in certain places, but + users came up with a different location, which has now been adopted. + + XT, the early XSLT processor, did not implement key and died when it was present. + So there are some versions of the Schematron skeleton for XT that strip out all + key elements. + + Xalan (e.g. Xalan4C 1.0 and a Xalan4J) also had a funny. A fix involved making + a top-level parameter called $hiddenKey and then using that instead of matching + "key". This has been removed. + --> + <xsl:template match="xsl:key" mode="do-keys" > + <xsl:if test="not(@name)"> + <xsl:message>Markup Error: no name attribute in <key></xsl:message> + </xsl:if> + <xsl:if test="not(@path) and not(@use)"> + <xsl:message>Markup Error: no path or use attribute in <key></xsl:message> + </xsl:if> + <xsl:choose> + <xsl:when test="parent::iso:rule "> + <xsl:call-template name="IamEmpty" /> + <xsl:choose> + <xsl:when test="@path"> + <axsl:key match="{../@context}" name="{@name}" use="{@path}"/> + </xsl:when> + <xsl:otherwise> + <axsl:key match="{../@context}" name="{@name}" use="{@use}"/> + </xsl:otherwise> + </xsl:choose> + </xsl:when> + <xsl:otherwise> + <xsl:if test="not(@match) "> + <xsl:message>Markup Error: no path or use attribute in <key></xsl:message> + </xsl:if> + <axsl:key> + <xsl:copy-of select="@*"/> + </axsl:key> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="xsl:key " /><!-- swallow --> + + <xsl:template match="iso:key " > + <xsl:message>Schema error: The key element is not in the ISO Schematron namespace. Use the XSLT namespace.</xsl:message> + </xsl:template> + + <!-- ISO INCLUDE --> + <!-- This is only a fallback. Include really needs to have been done before this as a separate pass.--> + + <xsl:template match="iso:include[not(normalize-space(@href))]" + priority="1"> + <xsl:if test=" $debug = 'false' "> + <xsl:message terminate="yes">Schema error: Empty href= attribute for include directive.</xsl:message> + </xsl:if> + + </xsl:template> + + <!-- Extend the URI syntax to allow # refererences --> + <!-- Add experimental support for simple containers like /xxx:xxx/iso:pattern to allow better includes --> + <xsl:template match="iso:include"> + <xsl:variable name="document-uri" select="substring-before(concat(@href,'#'), '#')"/> + <xsl:variable name="fragment-id" select="substring-after(@href, '#')"/> + + <xsl:choose> + + <xsl:when test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0" > + <xsl:message>Error: Impossible URL in Schematron include</xsl:message> + </xsl:when> + + <xsl:when test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_1" select="$theDocument_1//iso:*[@id= $fragment-id ]" /> + <xsl:if test=" $theFragment_1/self::iso:schema "> + <xsl:message>Schema error: Use include to include fragments, not a whole schema</xsl:message> + </xsl:if> + <xsl:apply-templates select=" $theFragment_1"/> + </xsl:when> + + <xsl:otherwise> + <xsl:variable name="theDocument_2" select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" select="$theDocument_2/iso:*" /> + <xsl:variable name="theContainedFragments" select="$theDocument_2/*/iso:*" /> + <xsl:if test=" $theFragment_2/self::iso:schema or $theContainedFragments/self::iso:schema"> + <xsl:message>Schema error: Use include to include fragments, not a whole schema</xsl:message> + </xsl:if> + <xsl:apply-templates select="$theFragment_2 | $theContainedFragments "/> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- This is to handle the particular case of including patterns --> + <xsl:template match="iso:include" mode="do-all-patterns"> + <xsl:variable name="document-uri" select="substring-before(concat(@href,'#'), '#')"/> + <xsl:variable name="fragment-id" select="substring-after(@href, '#')"/> + + <xsl:choose> + + <xsl:when test="string-length( $document-uri ) = 0 and string-length( $fragment-id ) = 0" > + <xsl:message>Error: Impossible URL in Schematron include</xsl:message> + </xsl:when> + + <xsl:when test="string-length( $fragment-id ) > 0"> + <xsl:variable name="theDocument_1" select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_1" select="$theDocument_1//iso:*[@id= $fragment-id ]" /> + <xsl:if test=" $theFragment_1/self::iso:schema "> + <xsl:message>Schema error: Use include to include fragments, not a whole schema</xsl:message> + </xsl:if> + <xsl:apply-templates select=" $theFragment_1" mode="do-all-patterns"/> + </xsl:when> + + <xsl:otherwise> + <!-- Import the top-level element if it is in schematron namespace, + or its children otherwise, to allow a simple containment mechanism. --> + <xsl:variable name="theDocument_2" select="document( $document-uri,/ )" /> + <xsl:variable name="theFragment_2" select="$theDocument_2/iso:*" /> + <xsl:variable name="theContainedFragments" select="$theDocument_2/*/iso:*" /> + <xsl:if test=" $theFragment_2/self::iso:schema or $theContainedFragments/self::iso:schema"> + <xsl:message>Schema error: Use include to include fragments, not a whole schema</xsl:message> + </xsl:if> + <xsl:apply-templates select="$theFragment_2 | $theContainedFragments " + mode="do-all-patterns" /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <!-- ISO LET --> + <xsl:template match="iso:let" > + <xsl:if test="ancestor::iso:schema[@queryBinding='xpath']"> + <xsl:message>Warning: Variables should not be used with the "xpath" query language binding.</xsl:message> + </xsl:if> + + <!-- lets at the top-level are implemented as parameters --> + + <xsl:choose> + <xsl:when test="parent::iso:schema"> + <!-- it is an error to have an empty param/@select because an XPath is expected --> + <axsl:param name="{@name}" select="{@value}"> + <xsl:if test="string-length(@value) > 0"> + <xsl:attribute name="select"><xsl:value-of select="@value"/></xsl:attribute> + </xsl:if> + </axsl:param> + </xsl:when> + <xsl:otherwise> + <axsl:variable name="{@name}" select="{@value}"/> + </xsl:otherwise> + </xsl:choose> + + </xsl:template> + + <!-- ISO NAME --> + <xsl:template match="iso:name" mode="text"> + + <xsl:if test="@path"> + <xsl:call-template name="process-name"> + <xsl:with-param name="name" select="concat('name(',@path,')')"/> + </xsl:call-template> + </xsl:if> + <xsl:if test="not(@path)"> + <xsl:call-template name="process-name"> + <xsl:with-param name="name" select="'name(.)'"/> + </xsl:call-template> + </xsl:if> + <xsl:call-template name="IamEmpty" /> + </xsl:template> + + <!-- ISO NS --> + <!-- Namespace handling is XSLT is quite tricky and implementation dependent --> + <xsl:template match="iso:ns"> + <xsl:call-template name="handle-namespace" /> + </xsl:template> + + <!-- This template is just to provide the API hook --> + <xsl:template match="iso:ns" mode="do-all-patterns" > + <xsl:if test="not(@uri)"> + <xsl:message>Markup Error: no uri attribute in <ns></xsl:message> + </xsl:if> + <xsl:if test="not(@prefix)"> + <xsl:message>Markup Error: no prefix attribute in <ns></xsl:message> + </xsl:if> + <xsl:call-template name="IamEmpty" /> + <xsl:call-template name="process-ns" > + <xsl:with-param name="prefix" select="@prefix"/> + <xsl:with-param name="uri" select="@uri"/> + </xsl:call-template> + </xsl:template> + + <!-- ISO P --> + <xsl:template match="iso:schema/iso:p " mode="do-schema-p" > + <xsl:call-template name="process-p"> + <xsl:with-param name="class" select="@class"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + </xsl:call-template> + </xsl:template> + <xsl:template match="iso:pattern/iso:p " mode="do-pattern-p" > + <xsl:call-template name="process-p"> + <xsl:with-param name="class" select="@class"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + </xsl:call-template> + </xsl:template> + + <!-- Currently, iso:p in other position are not passed through to the API --> + <xsl:template match="iso:phase/iso:p" /> + <xsl:template match="iso:p " priority="-1" /> + + <!-- ISO PATTERN --> + <xsl:template match="iso:pattern" mode="do-all-patterns"> + <xsl:if test="($phase = '#ALL') + or (../iso:phase[@id= $phase]/iso:active[@pattern= current()/@id])"> + <xsl:call-template name="process-pattern"> + <!-- the following select statement assumes that + @id | sch:title returns node-set in document order: + we want the title if it is there, otherwise the @id attribute --> + <xsl:with-param name="name" select="(@id | iso:title )[last()]"/> + <xsl:with-param name="is-a" select="''"/> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + </xsl:call-template> + <xsl:choose> + <xsl:when test="$select-contexts='key'"> + <axsl:apply-templates select="key('M','M{count(preceding-sibling::*)}')" mode="M{count(preceding-sibling::*)}"/> + </xsl:when> + <xsl:when test="$select-contexts='//'"> + <axsl:apply-templates mode="M{count(preceding-sibling::*)}"> + <xsl:attribute name="select"> + <xsl:text>//(</xsl:text> + <xsl:for-each select="iso:rule/@context"> + <xsl:text>(</xsl:text> + <xsl:value-of select="."/> + <xsl:text>)</xsl:text> + <xsl:if test="position()!=last()">|</xsl:if> + </xsl:for-each> + <xsl:text>)</xsl:text> + <xsl:if test="$visit-text='false'">[not(self::text())]</xsl:if> + </xsl:attribute> + </axsl:apply-templates> + </xsl:when> + <xsl:otherwise> + <axsl:apply-templates select="/" mode="M{count(preceding-sibling::*)}"/> + </xsl:otherwise> + </xsl:choose> + </xsl:if> + </xsl:template> + + <xsl:template match="iso:pattern[@abstract='true']"> + + <xsl:message>Schema implementation error: This schema has abstract patterns, yet they are supposed to be preprocessed out already + </xsl:message> + </xsl:template> + + <!-- Here is the template for the normal case of patterns --> + <xsl:template match="iso:pattern[not(@abstract='true')]"> + + <xsl:if test="($phase = '#ALL') + or (../iso:phase[@id= $phase]/iso:active[@pattern= current()/@id])"> + + <xsl:text> </xsl:text> + <xsl:comment>PATTERN <xsl:value-of select="@id" /> <xsl:value-of select="iso:title" /> </xsl:comment><xsl:text> </xsl:text> + <xsl:apply-templates /> + + <!-- DPC select-contexts test --> + <xsl:if test="not($select-contexts)"> + <axsl:template match="text()" priority="-1" mode="M{count(preceding-sibling::*)}"> + <!-- strip characters --> + </axsl:template> + + <!-- DPC introduce context-xpath variable --> + <axsl:template match="@*|node()" + priority="-2" + mode="M{ count(preceding-sibling::*) }"> + <axsl:apply-templates select="{$context-xpath}" mode="M{count(preceding-sibling::*)}"/> + </axsl:template> + </xsl:if> + </xsl:if> + </xsl:template> + + <!-- ISO PHASE --> + <xsl:template match="iso:phase" > + <xsl:if test="not(@id)"> + <xsl:message>Markup Error: no id attribute in <phase></xsl:message> + </xsl:if> + <xsl:apply-templates/> + </xsl:template> + + <!-- ISO RULE --> + <xsl:template match="iso:rule[not(@abstract='true')] "> + <xsl:if test="not(@context)"> + <xsl:message>Markup Error: no context attribute in <rule></xsl:message> + </xsl:if> + <xsl:text> </xsl:text> + <xsl:comment>RULE <xsl:value-of select="@id" /> </xsl:comment><xsl:text> </xsl:text> + <xsl:if test="iso:title"> + <xsl:comment><xsl:value-of select="iso:title" /></xsl:comment> + </xsl:if> + <!-- DPC select-contexts --> + <xsl:if test="$select-contexts='key'"> + <axsl:key name="M" + match="{@context}" + use="'M{count(../preceding-sibling::*)}'"/> + </xsl:if> + + +<!-- DPC priorities count up from 1000 not down from 4000 (templates in same priority order as before) --> + <axsl:template match="{@context}" + priority="{1000 + count(following-sibling::*)}" mode="M{count(../preceding-sibling::*)}"> + <xsl:call-template name="process-rule"> + <xsl:with-param name="context" select="@context"/> + + <!-- "Rich" properties --> + <xsl:with-param name="fpi" select="@fpi"/> + <xsl:with-param name="icon" select="@icon"/> + <xsl:with-param name="id" select="@id"/> + <xsl:with-param name="lang" select="@xml:lang"/> + <xsl:with-param name="see" select="@see" /> + <xsl:with-param name="space" select="@xml:space" /> + + <!-- "Linking" properties --> + <xsl:with-param name="role" select="@role" /> + <xsl:with-param name="subject" select="@subject" /> + </xsl:call-template> + <xsl:apply-templates/> + <!-- DPC introduce context-xpath and select-contexts variables --> + <xsl:if test="not($select-contexts)"> + <axsl:apply-templates select="{$context-xpath}" mode="M{count(../preceding-sibling::*)}"/> + </xsl:if> + </axsl:template> + </xsl:template> + + + <!-- ISO ABSTRACT RULE --> + <xsl:template match="iso:rule[@abstract='true'] " > + <xsl:if test=" not(@id)"> + <xsl:message>Markup Error: no id attribute on abstract <rule></xsl:message> + </xsl:if> + <xsl:if test="@context"> + <xsl:message>Markup Error: (2) context attribute on abstract <rule></xsl:message> + </xsl:if> + </xsl:template> + + <xsl:template match="iso:rule[@abstract='true']" + mode="extends" > + <xsl:if test="@context"> + <xsl:message>Markup Error: context attribute on abstract <rule></xsl:message> + </xsl:if> + <xsl:apply-templates/> + </xsl:template> + + <!-- ISO SPAN --> + <xsl:template match="iso:span" mode="text"> + <xsl:call-template name="process-span"> + <xsl:with-param name="class" select="@class"/> + </xsl:call-template> + </xsl:template> + + <!-- ISO TITLE --> + + <xsl:template match="iso:schema/iso:title" priority="1"> + <xsl:call-template name="process-schema-title" /> + </xsl:template> + + + <xsl:template match="iso:title" > + <xsl:call-template name="process-title" /> + </xsl:template> + + + <!-- ISO VALUE-OF --> + <xsl:template match="iso:value-of" mode="text" > + <xsl:if test="not(@select)"> + <xsl:message>Markup Error: no select attribute in <value-of></xsl:message> + </xsl:if> + <xsl:call-template name="IamEmpty" /> + + <xsl:choose> + <xsl:when test="@select"> + <xsl:call-template name="process-value-of"> + <xsl:with-param name="select" select="@select"/> + </xsl:call-template> + </xsl:when> + <xsl:otherwise > + <xsl:call-template name="process-value-of"> + <xsl:with-param name="select" select="'.'"/> + </xsl:call-template> + </xsl:otherwise> + </xsl:choose> + + </xsl:template> + + +<!-- ============================================================== --> +<!-- DEFAULT TEXT HANDLING --> +<!-- ============================================================== --> + <xsl:template match="text()" priority="-1" mode="do-keys"> + <!-- strip characters --> + </xsl:template> + <xsl:template match="text()" priority="-1" mode="do-all-patterns"> + <!-- strip characters --> + </xsl:template> + <xsl:template match="text()" priority="-1" mode="do-schema-p"> + <!-- strip characters --> + </xsl:template> + <xsl:template match="text()" priority="-1" mode="do-pattern-p"> + <!-- strip characters --> + </xsl:template> + + <xsl:template match="text()" priority="-1"> + <!-- Strip characters --> + </xsl:template> + + <xsl:template match="text()" mode="text"> + <xsl:value-of select="."/> + </xsl:template> + + <xsl:template match="text()" mode="inline-text"> + <xsl:value-of select="."/> + </xsl:template> + +<!-- ============================================================== --> +<!-- UTILITY TEMPLATES --> +<!-- ============================================================== --> +<xsl:template name="IamEmpty"> + <xsl:if test="count( * )"> + <xsl:message> + <xsl:text>Warning: </xsl:text> + <xsl:value-of select="name(.)"/> + <xsl:text> must not contain any child elements</xsl:text> + </xsl:message> + </xsl:if> +</xsl:template> + +<xsl:template name="diagnosticsSplit"> + <!-- Process at the current point the first of the <diagnostic> elements + referred to parameter str, and then recurse --> + <xsl:param name="str"/> + <xsl:variable name="start"> + <xsl:choose> + <xsl:when test="contains($str,' ')"> + <xsl:value-of select="substring-before($str,' ')"/> + </xsl:when> + <xsl:otherwise><xsl:value-of select="$str"/></xsl:otherwise> + </xsl:choose> + </xsl:variable> + + <xsl:variable name="end"> + <xsl:if test="contains($str,' ')"> + <xsl:value-of select="substring-after($str,' ')"/> + </xsl:if> + </xsl:variable> + + <!-- This works with all namespaces --> + <xsl:if test="not(string-length(normalize-space($start)) = 0) + and not(//iso:diagnostic[@id = $start]) + and not(//sch:diagnostic[@id = $start]) + and not(//diagnostic[@id = $start])"> + <xsl:message>Reference error: A diagnostic "<xsl:value-of select="string($start)" + />" has been referenced but is not declared</xsl:message> + </xsl:if> + + <xsl:if test="string-length(normalize-space($start)) > 0"> + <xsl:text> </xsl:text> + <xsl:apply-templates + select="//iso:diagnostic[@id = $start ] + | //sch:diagnostic[@id = $start ] + | //diagnostic[@id= $start ]"/> + </xsl:if> + + <xsl:if test="not($end='')"> + <xsl:call-template name="diagnosticsSplit"> + <xsl:with-param name="str" select="$end"/> + </xsl:call-template> + </xsl:if> +</xsl:template> + +<!-- It would be nice to use this but xsl:namespace does not + allow a fallback --> +<!--xsl:template name="handle-namespace" version="2.0"> + <xsl:namespace name="{@prefix}" select="@uri"> +</xsl:template--> + +<xsl:template name="handle-namespace"> + <!-- experimental code from http://eccnet.eccnet.com/pipermail/schematron-love-in/2006-June/000104.html --> + <!-- Handle namespaces differently for exslt systems, msxml, and default, only using XSLT1 syntax --> + <!-- For more info see http://fgeorges.blogspot.com/2007/01/creating-namespace-nodes-in-xslt-10.html --> + <xsl:choose> + <!-- The following code works for XSLT1 --> + <xsl:when test="function-available('exsl:node-set')"> + <xsl:variable name="ns-dummy-elements"> + <xsl:element name="{@prefix}:dummy" namespace="{@uri}"/> + </xsl:variable> + <xsl:variable name="p" select="@prefix"/> + <xsl:copy-of select="exsl:node-set($ns-dummy-elements) + /*/namespace::*[local-name()=$p]"/> + </xsl:when> + + <!-- End XSLT1 code --> + + <!-- Not tested yet + <xsl:when test="function-available('msxsl:node-set')"> + <xsl:variable name="ns-dummy-elements"> + <xsl:element name="{ $prefix }:e" namespace="{ $uri }"/> + </xsl:variable> + <xsl:copy-of select="msxsl:node-set($ns-dummy-elements)/*/namespace::*"/> + </xsl:when> + --> + + <xsl:when test="@prefix = 'xsl' "> + <!-- Do not generate dummy attributes with the xsl: prefix, as these + are errors against XSLT, because we presume that the output + stylesheet uses the xsl prefix. In any case, there would already + be a namespace declaration for the XSLT namespace generated + automatically, presumably using "xsl:". + --> + </xsl:when> + + <xsl:when test="@uri = 'http://www.w3.org/1999/XSL/Transform'"> + <xsl:message terminate="yes"> + <xsl:text>Using the XSLT namespace with a prefix other than "xsl" in </xsl:text> + <xsl:text>Schematron rules is not supported </xsl:text> + <xsl:text>in this processor: </xsl:text> + <xsl:value-of select="system-property('xsl:vendor')"/> + </xsl:message> + </xsl:when> + + <xsl:otherwise> + <xsl:attribute name="{concat(@prefix,':dummy-for-xmlns')}" namespace="{@uri}" /> + + </xsl:otherwise> + </xsl:choose> + + +</xsl:template> + +<!-- ============================================================== --> +<!-- UNEXPECTED ELEMENTS --> +<!-- ============================================================== --> + + <xsl:template match="iso:*" priority="-2"> + <xsl:message> + <xsl:text>Error: unrecognized element in ISO Schematron namespace: check spelling + and capitalization</xsl:text> + <xsl:value-of select="name(.)"/> + </xsl:message> + </xsl:template> + + + <!-- Swallow old namespace elements: there is an upfront test for them elsewhere --> + <xsl:template match="sch:*" priority="-2" /> + + <xsl:template match="*" priority="-3"> + <xsl:choose> + <xsl:when test=" $allow-foreign = 'false' "> + <xsl:message> + <xsl:text>Warning: unrecognized element </xsl:text> + <xsl:value-of select="name(.)"/> + </xsl:message> + </xsl:when> + <xsl:otherwise> + <xsl:copy-of select="." /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + + <xsl:template match="iso:*" mode="text" priority="-2" /> + <xsl:template match="*" mode="text" priority="-3"> + <xsl:choose> + <xsl:when test=" $allow-foreign = 'false' "> + <xsl:message> + <xsl:text>Warning: unrecognized element </xsl:text> + <xsl:value-of select="name(.)"/> + </xsl:message> + </xsl:when> + <xsl:otherwise> + <xsl:copy-of select="." /> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +<!-- ============================================================== --> +<!-- DEFAULT NAMED TEMPLATES --> +<!-- These are the actions that are performed unless overridden --> +<!-- ============================================================== --> + + <xsl:template name="process-prolog"/> + <!-- no params --> + + <xsl:template name="process-root"> + <xsl:param name="contents"/> + <xsl:param name="id" /> + <xsl:param name="version" /> + <xsl:param name="schemaVersion" /> + <xsl:param name="queryBinding" /> + <xsl:param name="title" /> + + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + + <xsl:copy-of select="$contents"/> + </xsl:template> + + <xsl:template name="process-assert"> + + <xsl:param name="test"/> + <xsl:param name="diagnostics" /> + <xsl:param name="id" /> + <xsl:param name="flag" /> + + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + + + <xsl:call-template name="process-message"> + <xsl:with-param name="pattern" select="$test"/> + <xsl:with-param name="role" select="$role"/> + </xsl:call-template> + + + </xsl:template> + + <xsl:template name="process-report"> + <xsl:param name="test"/> + <xsl:param name="diagnostics" /> + <xsl:param name="id" /> + <xsl:param name="flag" /> + + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + + <xsl:call-template name="process-message"> + <xsl:with-param name="pattern" select="$test"/> + <xsl:with-param name="role" select="$role"/> + </xsl:call-template> + </xsl:template> + + <xsl:template name="process-diagnostic"> + <xsl:param name="id" /> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="text"/> + <axsl:text> </axsl:text> + </xsl:template> + + <xsl:template name="process-dir"> + <xsl:param name="value" /> + + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:template> + + <xsl:template name="process-emph"> + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:template> + + <xsl:template name="process-name"> + <xsl:param name="name"/> + + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <axsl:value-of select="{$name}"/> + <axsl:text> </axsl:text> + + </xsl:template> + + <xsl:template name="process-ns" > + <!-- Note that process-ns is for reporting. The sch:ns elements are + independently used in the sch:schema template to provide namespace bindings --> + <xsl:param name="prefix"/> + <xsl:param name="uri" /> + </xsl:template> + + <xsl:template name="process-p"> + <xsl:param name="id" /> + <xsl:param name="class" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + </xsl:template> + + <xsl:template name="process-pattern"> + <xsl:param name="id" /> + <xsl:param name="name" /> + <xsl:param name="is-a" /> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + </xsl:template> + + + <xsl:template name="process-rule"> + <xsl:param name="context" /> + + <xsl:param name="id" /> + <xsl:param name="flag" /> + + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + </xsl:template> + + <xsl:template name="process-span" > + <xsl:param name="class" /> + + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:template> + + <xsl:template name="process-title" > + <xsl:param name="class" /> + <xsl:call-template name="process-p"> + <xsl:with-param name="class">title</xsl:with-param> + </xsl:call-template> + </xsl:template> + + <xsl:template name="process-schema-title" > + <xsl:param name="class" /> + <xsl:call-template name="process-title"> + <xsl:with-param name="class">schema-title</xsl:with-param> + </xsl:call-template> + </xsl:template> + + <xsl:template name="process-value-of"> + <xsl:param name="select"/> + + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <axsl:value-of select="{$select}"/> + <axsl:text> </axsl:text> + </xsl:template> + + <!-- default output action: the simplest customization is to just override this --> + <xsl:template name="process-message"> + <xsl:param name="pattern" /> + <xsl:param name="role" /> + + <xsl:apply-templates mode="text"/> + <xsl:if test=" $message-newline = 'true'" > + <axsl:value-of select="string(' ')"/> + </xsl:if> + + </xsl:template> +</xsl:stylesheet> + + + diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl new file mode 100644 index 00000000..dae74ff6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl @@ -0,0 +1,588 @@ +<?xml version="1.0" ?> +<!-- + ISO_SVRL.xsl + + Implementation of Schematron Validation Report Language from ISO Schematron + ISO/IEC 19757 Document Schema Definition Languages (DSDL) + Part 3: Rule-based validation Schematron + Annex D: Schematron Validation Report Language + + This ISO Standard is available free as a Publicly Available Specification in PDF from ISO. + Also see www.schematron.com for drafts and other information. + + This implementation of SVRL is designed to run with the "Skeleton" implementation + of Schematron which Oliver Becker devised. The skeleton code provides a + Schematron implementation but with named templates for handling all output; + the skeleton provides basic templates for output using this API, but client + validators can be written to import the skeleton and override the default output + templates as required. (In order to understand this, you must understand that + a named template such as "process-assert" in this XSLT stylesheet overrides and + replaces any template with the same name in the imported skeleton XSLT file.) + + The other important thing to understand in this code is that there are different + versions of the Schematron skeleton. These track the development of Schematron through + Schematron 1.5, Schematron 1.6 and now ISO Schematron. One only skeleton must be + imported. The code has templates for the different skeletons commented out for + convenience. ISO Schematron has a different namespace than Schematron 1.5 and 1.6; + so the ISO Schematron skeleton has been written itself with an optional import + statement to in turn import the Schematron 1.6 skeleton. This will allow you to + validate with schemas from either namespace. + + + History: + 2009-03-18 + * Fix atrribute with space "see " which generates wrong name in some processors + 2008-08-11 + * RJ Fix attribute/@select which saxon allows in XSLT 1 + 2008-08-07 + * RJ Add output-encoding attribute to specify final encoding to use + * Alter allow-foreign functionality so that Schematron span, emph and dir elements make + it to the output, for better formatting and because span can be used to mark up + semantically interesting information embedded in diagnostics, which reduces the + need to extend SVRL itself + * Diagnostic-reference had an invalid attribute @id that duplicated @diagnostic: removed + 2008-08-06 + * RJ Fix invalid output: svrl:diagnostic-reference is not contained in an svrl:text + * Output comment to SVRL file giving filename if available (from command-line parameter) + 2008-08-04 + * RJ move sch: prefix to schold: prefix to prevent confusion (we want people to + be able to switch from old namespace to new namespace without changing the + sch: prefix, so it is better to keep that prefix completely out of the XSLT) + * Extra signature fixes (PH) + 2008-08-03 + * Repair missing class parameter on process-p + 2008-07-31 + * Update skeleton names + 2007-04-03 + * Add option generate-fired-rule (RG) + 2007-02-07 + * Prefer true|false for parameters. But allow yes|no on some old for compatability + * DP Diagnostics output to svrl:text. Diagnosis put out after assertion text. + * Removed non-SVRL elements and attributes: better handled as an extra layer that invokes this one + * Add more formal parameters + * Correct confusion between $schemaVersion and $queryBinding + * Indent + * Validate against RNC schemas for XSLT 1 and 2 (with regex tests removed) + * Validate output with UniversalTest.sch against RNC schema for ISO SVRL + + 2007-02-01 + * DP. Update formal parameters of overriding named templates to handle more attributes. + * DP. Refactor handling of rich and linkable parameters to a named template. + + 2007-01-22 + * DP change svrl:ns to svrl:ns-in-attribute-value + * Change default when no queryBinding from "unknown" to "xslt" + + 2007-01-18: + * Improve documentation + * KH Add command-line options to generate paths or not + * Use axsl:attribute rather than xsl:attribute to shut XSLT2 up + * Add extra command-line options to pass to the iso_schematron_skeleton + + 2006-12-01: iso_svrl.xsl Rick Jelliffe, + * update namespace, + * update phase handling, + * add flag param to process-assert and process-report & @ flag on output + + 2001: Conformance1-5.xsl Rick Jelliffe, + * Created, using the skeleton code contributed by Oliver Becker +--> +<!-- + Derived from Conformance1-5.xsl. + + Copyright (c) 2001, 2006 Rick Jelliffe and Academia Sinica Computing Center, Taiwan + + This software is provided 'as-is', without any express or implied warranty. + In no event will the authors be held liable for any damages arising from + the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it freely, + subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not claim + that you wrote the original software. If you use this software in a product, + an acknowledgment in the product documentation would be appreciated but is + not required. + + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source distribution. +--> + +<!-- Ideas nabbed from schematrons by Francis N., Miloslav N. and David C. --> + +<!-- The command-line parameters are: + phase NMTOKEN | "#ALL" (default) Select the phase for validation + allow-foreign "true" | "false" (default) Pass non-Schematron elements and rich markup to the generated stylesheet + diagnose= true | false|yes|no Add the diagnostics to the assertion test in reports (yes|no are obsolete) + generate-paths=true|false|yes|no generate the @location attribute with XPaths (yes|no are obsolete) + sch.exslt.imports semi-colon delimited string of filenames for some EXSLT implementations + optimize "visit-no-attributes" Use only when the schema has no attributes as the context nodes + generate-fired-rule "true"(default) | "false" Generate fired-rule elements + +--> + +<xsl:stylesheet + version="1.0" + xmlns:xsl="http://www.w3.org/1999/XSL/Transform" + xmlns:xs="http://www.w3.org/2001/XMLSchema" + xmlns:axsl="http://www.w3.org/1999/XSL/TransformAlias" + xmlns:schold="http://www.ascc.net/xml/schematron" + xmlns:iso="http://purl.oclc.org/dsdl/schematron" + xmlns:svrl="http://purl.oclc.org/dsdl/svrl" +> + +<!-- Select the import statement and adjust the path as + necessary for your system. + If not XSLT2 then also remove svrl:active-pattern/@document="{document-uri()}" from process-pattern() +--> +<!-- +<xsl:import href="iso_schematron_skeleton_for_saxon.xsl"/> +--> + +<xsl:import href="iso_schematron_skeleton_for_xslt1.xsl"/> + <!-- +<xsl:import href="iso_schematron_skeleton.xsl"/> +<xsl:import href="skeleton1-5.xsl"/> +<xsl:import href="skeleton1-6.xsl"/> +--> + +<xsl:param name="diagnose" >true</xsl:param> +<xsl:param name="phase" > + <xsl:choose> + <!-- Handle Schematron 1.5 and 1.6 phases --> + <xsl:when test="//schold:schema/@defaultPhase"> + <xsl:value-of select="//schold:schema/@defaultPhase"/> + </xsl:when> + <!-- Handle ISO Schematron phases --> + <xsl:when test="//iso:schema/@defaultPhase"> + <xsl:value-of select="//iso:schema/@defaultPhase"/> + </xsl:when> + <xsl:otherwise>#ALL</xsl:otherwise> + </xsl:choose> +</xsl:param> +<xsl:param name="allow-foreign" >false</xsl:param> +<xsl:param name="generate-paths" >true</xsl:param> +<xsl:param name="generate-fired-rule" >true</xsl:param> +<xsl:param name="optimize"/> + +<xsl:param name="output-encoding" ></xsl:param> + +<!-- e.g. saxon file.xml file.xsl "sch.exslt.imports=.../string.xsl;.../math.xsl" --> +<xsl:param name="sch.exslt.imports" /> + + + +<!-- Experimental: If this file called, then must be generating svrl --> +<xsl:variable name="svrlTest" select="true()" /> + + + +<!-- ================================================================ --> + +<xsl:template name="process-prolog"> + <axsl:output method="xml" omit-xml-declaration="no" standalone="yes" + indent="yes"> + <xsl:if test=" string-length($output-encoding) > 0"> + <xsl:attribute name="encoding"><xsl:value-of select=" $output-encoding" /></xsl:attribute> + </xsl:if> + </axsl:output> + +</xsl:template> + +<!-- Overrides skeleton.xsl --> +<xsl:template name="process-root"> + <xsl:param name="title"/> + <xsl:param name="contents" /> + <xsl:param name="queryBinding" >xslt1</xsl:param> + <xsl:param name="schemaVersion" /> + <xsl:param name="id" /> + <xsl:param name="version"/> + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + + <svrl:schematron-output title="{$title}" schemaVersion="{$schemaVersion}" > + <xsl:if test=" string-length( normalize-space( $phase )) > 0 and + not( normalize-space( $phase ) = '#ALL') "> + <axsl:attribute name="phase"> + <xsl:value-of select=" $phase " /> + </axsl:attribute> + </xsl:if> + <xsl:if test=" $allow-foreign = 'true'"> + </xsl:if> + <xsl:if test=" $allow-foreign = 'true'"> + + <xsl:call-template name='richParms'> + <xsl:with-param name="fpi" select="$fpi" /> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> + </xsl:if> + + <axsl:comment><axsl:value-of select="$archiveDirParameter"/>   + <axsl:value-of select="$archiveNameParameter"/>   + <axsl:value-of select="$fileNameParameter"/>   + <axsl:value-of select="$fileDirParameter"/></axsl:comment> + + + <xsl:apply-templates mode="do-schema-p" /> + <xsl:copy-of select="$contents" /> + </svrl:schematron-output> +</xsl:template> + + +<xsl:template name="process-assert"> + <xsl:param name="test"/> + <xsl:param name="diagnostics" /> + <xsl:param name="id" /> + <xsl:param name="flag" /> + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <svrl:failed-assert test="{$test}" > + <xsl:if test="string-length( $id ) > 0"> + <axsl:attribute name="id"> + <xsl:value-of select=" $id " /> + </axsl:attribute> + </xsl:if> + <xsl:if test=" string-length( $flag ) > 0"> + <axsl:attribute name="flag"> + <xsl:value-of select=" $flag " /> + </axsl:attribute> + </xsl:if> + <!-- Process rich attributes. --> + <xsl:call-template name="richParms"> + <xsl:with-param name="fpi" select="$fpi"/> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> + <xsl:call-template name='linkableParms'> + <xsl:with-param name="role" select="$role" /> + <xsl:with-param name="subject" select="$subject"/> + </xsl:call-template> + <xsl:if test=" $generate-paths = 'true' or $generate-paths= 'yes' "> + <!-- true/false is the new way --> + <axsl:attribute name="location"> + <axsl:apply-templates select="." mode="schematron-get-full-path"/> + </axsl:attribute> + </xsl:if> + + <svrl:text> + <xsl:apply-templates mode="text" /> + + </svrl:text> + <xsl:if test="$diagnose = 'yes' or $diagnose= 'true' "> + <!-- true/false is the new way --> + <xsl:call-template name="diagnosticsSplit"> + <xsl:with-param name="str" select="$diagnostics"/> + </xsl:call-template> + </xsl:if> + </svrl:failed-assert> +</xsl:template> + +<xsl:template name="process-report"> + <xsl:param name="id"/> + <xsl:param name="test"/> + <xsl:param name="diagnostics"/> + <xsl:param name="flag" /> + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <svrl:successful-report test="{$test}" > + <xsl:if test=" string-length( $id ) > 0"> + <axsl:attribute name="id"> + <xsl:value-of select=" $id " /> + </axsl:attribute> + </xsl:if> + <xsl:if test=" string-length( $flag ) > 0"> + <axsl:attribute name="flag"> + <xsl:value-of select=" $flag " /> + </axsl:attribute> + </xsl:if> + + <!-- Process rich attributes. --> + <xsl:call-template name="richParms"> + <xsl:with-param name="fpi" select="$fpi"/> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> + <xsl:call-template name='linkableParms'> + <xsl:with-param name="role" select="$role" /> + <xsl:with-param name="subject" select="$subject"/> + </xsl:call-template> + <xsl:if test=" $generate-paths = 'yes' or $generate-paths = 'true' "> + <!-- true/false is the new way --> + <axsl:attribute name="location"> + <axsl:apply-templates select="." mode="schematron-get-full-path"/> + </axsl:attribute> + </xsl:if> + + <svrl:text> + <xsl:apply-templates mode="text" /> + + </svrl:text> + <xsl:if test="$diagnose = 'yes' or $diagnose='true' "> + <!-- true/false is the new way --> + <xsl:call-template name="diagnosticsSplit"> + <xsl:with-param name="str" select="$diagnostics"/> + </xsl:call-template> + </xsl:if> + </svrl:successful-report> +</xsl:template> + + + <!-- Overrides skeleton --> + <xsl:template name="process-dir" > + <xsl:param name="value" /> + <xsl:choose> + <xsl:when test=" $allow-foreign = 'true'"> + <xsl:copy-of select="."/> + </xsl:when> + + <xsl:otherwise> + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +<xsl:template name="process-diagnostic"> + <xsl:param name="id"/> + <!-- Rich parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <svrl:diagnostic-reference diagnostic="{$id}" > + + <xsl:call-template name="richParms"> + <xsl:with-param name="fpi" select="$fpi"/> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> +<xsl:text> +</xsl:text> + + <xsl:apply-templates mode="text"/> + + </svrl:diagnostic-reference> +</xsl:template> + + + <!-- Overrides skeleton --> + <xsl:template name="process-emph" > + <xsl:param name="class" /> + <xsl:choose> + <xsl:when test=" $allow-foreign = 'true'"> + <xsl:copy-of select="."/> + </xsl:when> + <xsl:otherwise> + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +<xsl:template name="process-rule"> + <xsl:param name="id"/> + <xsl:param name="context"/> + <xsl:param name="flag"/> + <!-- "Linkable" parameters --> + <xsl:param name="role"/> + <xsl:param name="subject"/> + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <xsl:if test=" $generate-fired-rule = 'true'"> + <svrl:fired-rule context="{$context}" > + <!-- Process rich attributes. --> + <xsl:call-template name="richParms"> + <xsl:with-param name="fpi" select="$fpi"/> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> + <xsl:if test=" string( $id )"> + <xsl:attribute name="id"> + <xsl:value-of select=" $id " /> + </xsl:attribute> + </xsl:if> + <xsl:if test=" string-length( $role ) > 0"> + <xsl:attribute name="role"> + <xsl:value-of select=" $role " /> + </xsl:attribute> + </xsl:if> + </svrl:fired-rule> +</xsl:if> +</xsl:template> + +<xsl:template name="process-ns"> + <xsl:param name="prefix"/> + <xsl:param name="uri"/> + <svrl:ns-prefix-in-attribute-values uri="{$uri}" prefix="{$prefix}" /> +</xsl:template> + +<xsl:template name="process-p"> + <xsl:param name="icon"/> + <xsl:param name="class"/> + <xsl:param name="id"/> + <xsl:param name="lang"/> + + <svrl:text> + <xsl:apply-templates mode="text"/> + </svrl:text> +</xsl:template> + +<xsl:template name="process-pattern"> + <xsl:param name="name"/> + <xsl:param name="id"/> + <xsl:param name="is-a"/> + + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <svrl:active-pattern > + <xsl:if test=" string( $id )"> + <axsl:attribute name="id"> + <xsl:value-of select=" $id " /> + </axsl:attribute> + </xsl:if> + <xsl:if test=" string( $name )"> + <axsl:attribute name="name"> + <xsl:value-of select=" $name " /> + </axsl:attribute> + </xsl:if> + + <xsl:call-template name='richParms'> + <xsl:with-param name="fpi" select="$fpi"/> + <xsl:with-param name="icon" select="$icon"/> + <xsl:with-param name="lang" select="$lang"/> + <xsl:with-param name="see" select="$see" /> + <xsl:with-param name="space" select="$space" /> + </xsl:call-template> + + <!-- ?? report that this screws up iso:title processing --> + <xsl:apply-templates mode="do-pattern-p"/> + <!-- ?? Seems that this apply-templates is never triggered DP --> + <axsl:apply-templates /> + </svrl:active-pattern> +</xsl:template> + +<!-- Overrides skeleton --> +<xsl:template name="process-message" > + <xsl:param name="pattern"/> + <xsl:param name="role"/> +</xsl:template> + + + <!-- Overrides skeleton --> + <xsl:template name="process-span" > + <xsl:param name="class" /> + <xsl:choose> + <xsl:when test=" $allow-foreign = 'true'"> + <xsl:copy-of select="."/> + </xsl:when> + <xsl:otherwise> + <!-- We generate too much whitespace rather than risking concatenation --> + <axsl:text> </axsl:text> + <xsl:apply-templates mode="inline-text"/> + <axsl:text> </axsl:text> + </xsl:otherwise> + </xsl:choose> + </xsl:template> + +<!-- =========================================================================== --> +<!-- processing rich parameters. --> +<xsl:template name='richParms'> + <!-- "Rich" parameters --> + <xsl:param name="fpi" /> + <xsl:param name="icon" /> + <xsl:param name="lang" /> + <xsl:param name="see" /> + <xsl:param name="space" /> + <!-- Process rich attributes. --> + <xsl:if test=" $allow-foreign = 'true'"> + <xsl:if test="string($fpi)"> + <axsl:attribute name="fpi"> + <xsl:value-of select="$fpi"/> + </axsl:attribute> + </xsl:if> + <xsl:if test="string($icon)"> + <axsl:attribute name="icon"> + <xsl:value-of select="$icon"/> + </axsl:attribute> + </xsl:if> + <xsl:if test="string($see)"> + <axsl:attribute name="see"> + <xsl:value-of select="$see"/> + </axsl:attribute> + </xsl:if> + </xsl:if> + <xsl:if test="string($space)"> + <axsl:attribute name="xml:space"> + <xsl:value-of select="$space"/> + </axsl:attribute> + </xsl:if> + <xsl:if test="string($lang)"> + <axsl:attribute name="xml:lang"> + <xsl:value-of select="$lang"/> + </axsl:attribute> + </xsl:if> +</xsl:template> + +<!-- processing linkable parameters. --> +<xsl:template name='linkableParms'> + <xsl:param name="role"/> + <xsl:param name="subject"/> + + <!-- ISO SVRL has a role attribute to match the Schematron role attribute --> + <xsl:if test=" string($role )"> + <axsl:attribute name="role"> + <xsl:value-of select=" $role " /> + </axsl:attribute> + </xsl:if> + <!-- ISO SVRL does not have a subject attribute to match the Schematron subject attribute. + Instead, the Schematron subject attribute is folded into the location attribute --> +</xsl:template> + + +</xsl:stylesheet> + diff --git a/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt new file mode 100644 index 00000000..e5d6dfcd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt @@ -0,0 +1,84 @@ +ISO SCHEMATRON 2010
+
+XSLT implementation by Rick Jelliffe with assistance from members of Schematron-love-in maillist.
+
+2010-04-21
+
+Two distributions are available. One is for XSLT1 engines.
+The other is for XSLT2 engines, such as SAXON 9.
+
+
+This version of Schematron splits the process into a pipeline of several different XSLT stages.
+
+1) First, preprocess your Schematron schema with iso_dsdl_include.xsl.
+This is a macro processor to assemble the schema from various parts.
+If your schema is not in separate parts, you can skip this stage.
+This stage also generates error messages for some common XPath syntax problems.
+
+2) Second, preprocess the output from stage 1 with iso_abstract_expand.xsl.
+This is a macro processor to convert abstract patterns to real patterns.
+If your schema does not use abstract patterns, you can skip this
+stage.
+
+3) Third, compile the Schematron schema into an XSLT script.
+This will typically use iso_svrl_for_xslt1.xsl or iso_svrl_for_xslt2.xsl
+(which in turn invoke iso_schematron_skeleton_for_xslt1.xsl or iso_schematron_skeleton_for_saxon.xsl)
+However, other "meta-stylesheets" are also in common use; the principle of operation is the same.
+If your schema uses Schematron phases, supply these as command line/invocation parameters
+to this process.
+
+4) Fourth, run the script generated by stage 3 against the document being validated.
+If you are using the SVRL script, then the output of validation will be an XML document.
+If your schema uses Schematron parameters, supply these as command line/invocation parameters
+to this process.
+
+
+The XSLT2 distribution also features several next generation features,
+such as validating multiple documents. See the source code for details.
+
+Schematron assertions can be written in any language, of course; the file
+sch-messages-en.xhtml contains the diagnostics messages from the XSLT2 skeleton
+in English, and this can be used as template to localize the skeleton's
+error messages. Note that typically programming errors in Schematron are XPath
+errors, which requires localized messages from the XSLT engine.
+
+ANT
+---
+To give an example of how to process a document, here is a sample ANT task.
+
+<target name="schematron-compile-test" >
+
+ <!-- expand inclusions -->
+ <xslt basedir="test/schematron"
+ style="iso_dsdl_include.xsl" in="test.sch" out="test1.sch">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+ <!-- expand abstract patterns -->
+ <xslt basedir="test/schematron"
+ style="iso_abstract_expand.xsl" in="test1.sch" out="test2.sch">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+
+
+ <!-- compile it -->
+ <xslt basedir="test/schematron"
+ style="iso_svrl_for_xslt2.xsl" in="test2.sch" out="test.xsl">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+
+ <!-- validate -->
+ <xslt basedir="test/schematron"
+ style="test.xsl" in="instance.xml" out="instance.svrlt">
+ <classpath>
+ <pathelement location="${lib.dir}/saxon9.jar"/>
+ </classpath>
+ </xslt>
+ </target>
diff --git a/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi b/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi new file mode 100644 index 00000000..42b75249 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/iterparse.pxi @@ -0,0 +1,438 @@ +# iterparse -- event-driven parsing + +DEF __ITERPARSE_CHUNK_SIZE = 32768 + +cdef class iterparse: + """iterparse(self, source, events=("end",), tag=None, \ + attribute_defaults=False, dtd_validation=False, \ + load_dtd=False, no_network=True, remove_blank_text=False, \ + remove_comments=False, remove_pis=False, encoding=None, \ + html=False, recover=None, huge_tree=False, schema=None) + + Incremental parser. + + Parses XML into a tree and generates tuples (event, element) in a + SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns', + 'end-ns'. + + For 'start' and 'end', ``element`` is the Element that the parser just + found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of + a new namespace declaration. For 'end-ns', it is simply None. Note that + all start and end events are guaranteed to be properly nested. + + The keyword argument ``events`` specifies a sequence of event type names + that should be generated. By default, only 'end' events will be + generated. + + The additional ``tag`` argument restricts the 'start' and 'end' events to + those elements that match the given tag. The ``tag`` argument can also be + a sequence of tags to allow matching more than one tag. By default, + events are generated for all elements. Note that the 'start-ns' and + 'end-ns' events are not impacted by this restriction. + + The other keyword arguments in the constructor are mainly based on the + libxml2 parser configuration. A DTD will also be loaded if validation or + attribute default values are requested. + + Available boolean keyword arguments: + - attribute_defaults: read default attributes from DTD + - dtd_validation: validate (if DTD is available) + - load_dtd: use DTD for parsing + - no_network: prevent network access for related files + - remove_blank_text: discard blank text nodes + - remove_comments: discard comments + - remove_pis: discard processing instructions + - strip_cdata: replace CDATA sections by normal text content (default: + True for XML, ignored otherwise) + - compact: safe memory for short text content (default: True) + - resolve_entities: replace entities by their text value (default: True) + - huge_tree: disable security restrictions and support very deep trees + and very long text content (only affects libxml2 2.7+) + - html: parse input as HTML (default: XML) + - recover: try hard to parse through broken input (default: True for HTML, + False otherwise) + + Other keyword arguments: + - encoding: override the document encoding + - schema: an XMLSchema to validate against + """ + cdef _FeedParser _parser + cdef object _tag + cdef object _events + cdef readonly object root + cdef object _source + cdef object _filename + cdef object _error + cdef bint _close_source_after_read + + def __init__(self, source, events=("end",), *, tag=None, + attribute_defaults=False, dtd_validation=False, + load_dtd=False, no_network=True, remove_blank_text=False, + compact=True, resolve_entities=True, remove_comments=False, + remove_pis=False, strip_cdata=True, encoding=None, + html=False, recover=None, huge_tree=False, collect_ids=True, + XMLSchema schema=None): + if not hasattr(source, 'read'): + source = _getFSPathOrObject(source) + self._filename = source + self._source = open(source, 'rb') + self._close_source_after_read = True + else: + self._filename = _getFilenameForFile(source) + self._source = source + self._close_source_after_read = False + + if recover is None: + recover = html + + if html: + # make sure we're not looking for namespaces + events = [event for event in events + if event not in ('start-ns', 'end-ns')] + parser = HTMLPullParser( + events, + tag=tag, + recover=recover, + base_url=self._filename, + encoding=encoding, + remove_blank_text=remove_blank_text, + remove_comments=remove_comments, + remove_pis=remove_pis, + no_network=no_network, + target=None, # TODO + schema=schema, + compact=compact) + else: + parser = XMLPullParser( + events, + tag=tag, + recover=recover, + base_url=self._filename, + encoding=encoding, + attribute_defaults=attribute_defaults, + dtd_validation=dtd_validation, + load_dtd=load_dtd, + no_network=no_network, + schema=schema, + huge_tree=huge_tree, + remove_blank_text=remove_blank_text, + resolve_entities=resolve_entities, + remove_comments=remove_comments, + remove_pis=remove_pis, + strip_cdata=strip_cdata, + collect_ids=True, + target=None, # TODO + compact=compact) + + self._events = parser.read_events() + self._parser = parser + + @property + def error_log(self): + """The error log of the last (or current) parser run. + """ + return self._parser.feed_error_log + + @property + def resolvers(self): + """The custom resolver registry of the last (or current) parser run. + """ + return self._parser.resolvers + + @property + def version(self): + """The version of the underlying XML parser.""" + return self._parser.version + + def set_element_class_lookup(self, ElementClassLookup lookup = None): + """set_element_class_lookup(self, lookup = None) + + Set a lookup scheme for element classes generated from this parser. + + Reset it by passing None or nothing. + """ + self._parser.set_element_class_lookup(lookup) + + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with this parser. + """ + self._parser.makeelement( + _tag, attrib=None, nsmap=None, **_extra) + + @cython.final + cdef _close_source(self): + if self._source is None: + return + if not self._close_source_after_read: + self._source = None + return + try: + close = self._source.close + except AttributeError: + close = None + finally: + self._source = None + if close is not None: + close() + + def __iter__(self): + return self + + def __next__(self): + try: + return next(self._events) + except StopIteration: + pass + context = <_SaxParserContext>self._parser._getPushParserContext() + if self._source is not None: + done = False + while not done: + try: + done = self._read_more_events(context) + return next(self._events) + except StopIteration: + pass # no events yet + except Exception as e: + self._error = e + self._close_source() + try: + return next(self._events) + except StopIteration: + break + # nothing left to read or return + if self._error is not None: + error = self._error + self._error = None + raise error + if (context._validator is not None + and not context._validator.isvalid()): + _raiseParseError(context._c_ctxt, self._filename, + context._error_log) + # no errors => all done + raise StopIteration + + @cython.final + cdef bint _read_more_events(self, _SaxParserContext context) except -123: + data = self._source.read(__ITERPARSE_CHUNK_SIZE) + if not isinstance(data, bytes): + self._close_source() + raise TypeError("reading file objects must return bytes objects") + if not data: + try: + self.root = self._parser.close() + finally: + self._close_source() + return True + self._parser.feed(data) + return False + + +cdef enum _IterwalkSkipStates: + IWSKIP_NEXT_IS_START + IWSKIP_SKIP_NEXT + IWSKIP_CAN_SKIP + IWSKIP_CANNOT_SKIP + + +cdef class iterwalk: + """iterwalk(self, element_or_tree, events=("end",), tag=None) + + A tree walker that generates events from an existing tree as if it + was parsing XML data with ``iterparse()``. + + Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a + sequence of tags. + + After receiving a 'start' or 'start-ns' event, the children and + descendants of the current element can be excluded from iteration + by calling the ``skip_subtree()`` method. + """ + cdef _MultiTagMatcher _matcher + cdef list _node_stack + cdef list _events + cdef object _pop_event + cdef object _include_siblings + cdef int _index + cdef int _event_filter + cdef _IterwalkSkipStates _skip_state + + def __init__(self, element_or_tree, events=("end",), tag=None): + cdef _Element root + cdef int ns_count + root = _rootNodeOrRaise(element_or_tree) + self._event_filter = _buildParseEventFilter(events) + if tag is None or tag == '*': + self._matcher = None + else: + self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag) + self._node_stack = [] + self._events = [] + self._pop_event = self._events.pop + self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default + + if self._event_filter: + self._index = 0 + if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START: + self._matcher.cacheTags(root._doc) + + # When processing an ElementTree, add events for the preceding comments/PIs. + if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI): + if isinstance(element_or_tree, _ElementTree): + self._include_siblings = root + for elem in list(root.itersiblings(preceding=True))[::-1]: + if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment: + self._events.append(('comment', elem)) + elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI: + self._events.append(('pi', elem)) + + ns_count = self._start_node(root) + self._node_stack.append( (root, ns_count) ) + else: + self._index = -1 + + def __iter__(self): + return self + + def __next__(self): + cdef xmlNode* c_child + cdef _Element node + cdef _Element next_node + cdef int ns_count = 0 + if self._events: + return self._next_event() + if self._matcher is not None and self._index >= 0: + node = self._node_stack[self._index][0] + self._matcher.cacheTags(node._doc) + + # find next node + while self._index >= 0: + node = self._node_stack[self._index][0] + + if self._skip_state == IWSKIP_SKIP_NEXT: + c_child = NULL + else: + c_child = self._process_non_elements( + node._doc, _findChildForwards(node._c_node, 0)) + self._skip_state = IWSKIP_CANNOT_SKIP + + while c_child is NULL: + # back off through parents + self._index -= 1 + node = self._end_node() + if self._index < 0: + break + c_child = self._process_non_elements( + node._doc, _nextElement(node._c_node)) + + if c_child is not NULL: + next_node = _elementFactory(node._doc, c_child) + if self._event_filter & (PARSE_EVENT_FILTER_START | + PARSE_EVENT_FILTER_START_NS): + ns_count = self._start_node(next_node) + elif self._event_filter & PARSE_EVENT_FILTER_END_NS: + ns_count = _countNsDefs(next_node._c_node) + self._node_stack.append( (next_node, ns_count) ) + self._index += 1 + if self._events: + return self._next_event() + + if self._include_siblings is not None: + node, self._include_siblings = self._include_siblings, None + self._process_non_elements(node._doc, _nextElement(node._c_node)) + if self._events: + return self._next_event() + + raise StopIteration + + @cython.final + cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node): + while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE: + if c_node.type == tree.XML_COMMENT_NODE: + if self._event_filter & PARSE_EVENT_FILTER_COMMENT: + self._events.append( + ("comment", _elementFactory(doc, c_node))) + c_node = _nextElement(c_node) + elif c_node.type == tree.XML_PI_NODE: + if self._event_filter & PARSE_EVENT_FILTER_PI: + self._events.append( + ("pi", _elementFactory(doc, c_node))) + c_node = _nextElement(c_node) + else: + break + return c_node + + @cython.final + cdef _next_event(self): + if self._skip_state == IWSKIP_NEXT_IS_START: + if self._events[0][0] in ('start', 'start-ns'): + self._skip_state = IWSKIP_CAN_SKIP + return self._pop_event(0) + + def skip_subtree(self): + """Prevent descending into the current subtree. + Instead, the next returned event will be the 'end' event of the current element + (if included), ignoring any children or descendants. + + This has no effect right after an 'end' or 'end-ns' event. + """ + if self._skip_state == IWSKIP_CAN_SKIP: + self._skip_state = IWSKIP_SKIP_NEXT + + @cython.final + cdef int _start_node(self, _Element node) except -1: + cdef int ns_count + if self._event_filter & PARSE_EVENT_FILTER_START_NS: + ns_count = _appendStartNsEvents(node._c_node, self._events) + if self._events: + self._skip_state = IWSKIP_NEXT_IS_START + elif self._event_filter & PARSE_EVENT_FILTER_END_NS: + ns_count = _countNsDefs(node._c_node) + else: + ns_count = 0 + if self._event_filter & PARSE_EVENT_FILTER_START: + if self._matcher is None or self._matcher.matches(node._c_node): + self._events.append( ("start", node) ) + self._skip_state = IWSKIP_NEXT_IS_START + return ns_count + + @cython.final + cdef _Element _end_node(self): + cdef _Element node + cdef int i, ns_count + node, ns_count = self._node_stack.pop() + if self._event_filter & PARSE_EVENT_FILTER_END: + if self._matcher is None or self._matcher.matches(node._c_node): + self._events.append( ("end", node) ) + if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count: + event = ("end-ns", None) + for i in range(ns_count): + self._events.append(event) + return node + + +cdef int _countNsDefs(xmlNode* c_node) noexcept: + cdef xmlNs* c_ns + cdef int count + count = 0 + c_ns = c_node.nsDef + while c_ns is not NULL: + count += (c_ns.href is not NULL) + c_ns = c_ns.next + return count + + +cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1: + cdef xmlNs* c_ns + cdef int count + count = 0 + c_ns = c_node.nsDef + while c_ns is not NULL: + if c_ns.href: + ns_tuple = (funicodeOrEmpty(c_ns.prefix), + funicode(c_ns.href)) + event_list.append( ("start-ns", ns_tuple) ) + count += 1 + c_ns = c_ns.next + return count diff --git a/.venv/lib/python3.12/site-packages/lxml/lxml.etree.h b/.venv/lib/python3.12/site-packages/lxml/lxml.etree.h new file mode 100644 index 00000000..c2a0f5ab --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/lxml.etree.h @@ -0,0 +1,248 @@ +/* Generated by Cython 3.0.11 */ + +#ifndef __PYX_HAVE__lxml__etree +#define __PYX_HAVE__lxml__etree + +#include "Python.h" +struct LxmlDocument; +struct LxmlElement; +struct LxmlElementTree; +struct LxmlElementTagMatcher; +struct LxmlElementIterator; +struct LxmlElementBase; +struct LxmlElementClassLookup; +struct LxmlFallbackElementClassLookup; + +/* "lxml/etree.pyx":355 + * + * # type of a function that steps from node to node + * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<< + * + * + */ +typedef xmlNode *(*_node_to_node_function)(xmlNode *); + +/* "lxml/etree.pyx":371 + * @cython.final + * @cython.freelist(8) + * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<< + * """Internal base class to reference a libxml document. + * + */ +struct LxmlDocument { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab; + int _ns_counter; + PyObject *_prefix_tail; + xmlDoc *_c_doc; + struct __pyx_obj_4lxml_5etree__BaseParser *_parser; +}; + +/* "lxml/etree.pyx":720 + * + * @cython.no_gc_clear + * cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<< + * """Element class. + * + */ +struct LxmlElement { + PyObject_HEAD + struct LxmlDocument *_doc; + xmlNode *_c_node; + PyObject *_tag; +}; + +/* "lxml/etree.pyx":1895 + * + * + * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<< + * object LxmlElementTree ]: + * cdef _Document _doc + */ +struct LxmlElementTree { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab; + struct LxmlDocument *_doc; + struct LxmlElement *_context_node; +}; + +/* "lxml/etree.pyx":2669 + * + * + * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<< + * type LxmlElementTagMatcherType ]: + * """ + */ +struct LxmlElementTagMatcher { + PyObject_HEAD + struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab; + PyObject *_pystrings; + int _node_type; + char *_href; + char *_name; +}; + +/* "lxml/etree.pyx":2700 + * self._name = NULL + * + * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<< + * object LxmlElementIterator, type LxmlElementIteratorType ]: + * """ + */ +struct LxmlElementIterator { + struct LxmlElementTagMatcher __pyx_base; + struct LxmlElement *_node; + _node_to_node_function _next_element; +}; + +/* "src/lxml/classlookup.pxi":6 + * # Custom Element classes + * + * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<< + * object LxmlElementBase ]: + * """ElementBase(*children, attrib=None, nsmap=None, **_extra) + */ +struct LxmlElementBase { + struct LxmlElement __pyx_base; +}; + +/* "src/lxml/classlookup.pxi":210 + * # Element class lookup + * + * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<< + * + * # class to store element class lookup functions + */ +typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *); + +/* "src/lxml/classlookup.pxi":213 + * + * # class to store element class lookup functions + * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<< + * object LxmlElementClassLookup ]: + * """ElementClassLookup(self) + */ +struct LxmlElementClassLookup { + PyObject_HEAD + _element_class_lookup_function _lookup_function; +}; + +/* "src/lxml/classlookup.pxi":221 + * + * + * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<< + * [ type LxmlFallbackElementClassLookupType, + * object LxmlFallbackElementClassLookup ]: + */ +struct LxmlFallbackElementClassLookup { + struct LxmlElementClassLookup __pyx_base; + struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab; + struct LxmlElementClassLookup *fallback; + _element_class_lookup_function _fallback_function; +}; + +#ifndef __PYX_HAVE_API__lxml__etree + +#ifdef CYTHON_EXTERN_C + #undef __PYX_EXTERN_C + #define __PYX_EXTERN_C CYTHON_EXTERN_C +#elif defined(__PYX_EXTERN_C) + #ifdef _MSC_VER + #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.") + #else + #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead. + #endif +#else + #ifdef __cplusplus + #define __PYX_EXTERN_C extern "C" + #else + #define __PYX_EXTERN_C extern + #endif +#endif + +#ifndef DL_IMPORT + #define DL_IMPORT(_T) _T +#endif + +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType; +__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType; + +__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *); +__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *); +__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int); +__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); +__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *); +__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *); +__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *); +__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *); +__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *); +__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *); +__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *); +__PYX_EXTERN_C int hasText(xmlNode *); +__PYX_EXTERN_C int hasTail(xmlNode *); +__PYX_EXTERN_C PyObject *textOf(xmlNode *); +__PYX_EXTERN_C PyObject *tailOf(xmlNode *); +__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *); +__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *); +__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *); +__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *); +__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int); +__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int); +__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *); +__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *); +__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *); +__PYX_EXTERN_C int hasChild(xmlNode *); +__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t); +__PYX_EXTERN_C xmlNode *nextElement(xmlNode *); +__PYX_EXTERN_C xmlNode *previousElement(xmlNode *); +__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *); +__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *); +__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *); +__PYX_EXTERN_C PyObject *utf8(PyObject *); +__PYX_EXTERN_C PyObject *getNsTag(PyObject *); +__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *); +__PYX_EXTERN_C PyObject *namespacedName(xmlNode *); +__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *); +__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *); +__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *); +__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *); + +#endif /* !__PYX_HAVE_API__lxml__etree */ + +/* WARNING: the interface of the module init function changed in CPython 3.5. */ +/* It now returns a PyModuleDef instance instead of a PyModule instance. */ + +#if PY_MAJOR_VERSION < 3 +PyMODINIT_FUNC initetree(void); +#else +/* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */ +PyMODINIT_FUNC PyInit_etree(void); + +#if PY_VERSION_HEX >= 0x03050000 && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201402L)) +#if defined(__cplusplus) && __cplusplus >= 201402L +[[deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")]] inline +#elif defined(__GNUC__) || defined(__clang__) +__attribute__ ((__deprecated__("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly."), __unused__)) __inline__ +#elif defined(_MSC_VER) +__declspec(deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")) __inline +#endif +static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) { + return res; +} +#define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree()) +#endif +#endif + +#endif /* !__PYX_HAVE__lxml__etree */ diff --git a/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h b/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h new file mode 100644 index 00000000..ff0ca50c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/lxml.etree_api.h @@ -0,0 +1,195 @@ +/* Generated by Cython 3.0.11 */ + +#ifndef __PYX_HAVE_API__lxml__etree +#define __PYX_HAVE_API__lxml__etree +#ifdef __MINGW64__ +#define MS_WIN64 +#endif +#include "Python.h" +#include "lxml.etree.h" + +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument)(struct LxmlDocument *, xmlNode *) = 0; +#define deepcopyNodeToDocument __pyx_api_f_4lxml_5etree_deepcopyNodeToDocument +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_elementTreeFactory)(struct LxmlElement *) = 0; +#define elementTreeFactory __pyx_api_f_4lxml_5etree_elementTreeFactory +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_newElementTree)(struct LxmlElement *, PyObject *) = 0; +#define newElementTree __pyx_api_f_4lxml_5etree_newElementTree +static struct LxmlElementTree *(*__pyx_api_f_4lxml_5etree_adoptExternalDocument)(xmlDoc *, PyObject *, int) = 0; +#define adoptExternalDocument __pyx_api_f_4lxml_5etree_adoptExternalDocument +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_elementFactory)(struct LxmlDocument *, xmlNode *) = 0; +#define elementFactory __pyx_api_f_4lxml_5etree_elementFactory +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeElement)(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; +#define makeElement __pyx_api_f_4lxml_5etree_makeElement +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_makeSubElement)(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *) = 0; +#define makeSubElement __pyx_api_f_4lxml_5etree_makeSubElement +static void (*__pyx_api_f_4lxml_5etree_setElementClassLookupFunction)(_element_class_lookup_function, PyObject *) = 0; +#define setElementClassLookupFunction __pyx_api_f_4lxml_5etree_setElementClassLookupFunction +static PyObject *(*__pyx_api_f_4lxml_5etree_lookupDefaultElementClass)(PyObject *, PyObject *, xmlNode *) = 0; +#define lookupDefaultElementClass __pyx_api_f_4lxml_5etree_lookupDefaultElementClass +static PyObject *(*__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass)(PyObject *, PyObject *, xmlNode *) = 0; +#define lookupNamespaceElementClass __pyx_api_f_4lxml_5etree_lookupNamespaceElementClass +static PyObject *(*__pyx_api_f_4lxml_5etree_callLookupFallback)(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *) = 0; +#define callLookupFallback __pyx_api_f_4lxml_5etree_callLookupFallback +static int (*__pyx_api_f_4lxml_5etree_tagMatches)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define tagMatches __pyx_api_f_4lxml_5etree_tagMatches +static struct LxmlDocument *(*__pyx_api_f_4lxml_5etree_documentOrRaise)(PyObject *) = 0; +#define documentOrRaise __pyx_api_f_4lxml_5etree_documentOrRaise +static struct LxmlElement *(*__pyx_api_f_4lxml_5etree_rootNodeOrRaise)(PyObject *) = 0; +#define rootNodeOrRaise __pyx_api_f_4lxml_5etree_rootNodeOrRaise +static int (*__pyx_api_f_4lxml_5etree_hasText)(xmlNode *) = 0; +#define hasText __pyx_api_f_4lxml_5etree_hasText +static int (*__pyx_api_f_4lxml_5etree_hasTail)(xmlNode *) = 0; +#define hasTail __pyx_api_f_4lxml_5etree_hasTail +static PyObject *(*__pyx_api_f_4lxml_5etree_textOf)(xmlNode *) = 0; +#define textOf __pyx_api_f_4lxml_5etree_textOf +static PyObject *(*__pyx_api_f_4lxml_5etree_tailOf)(xmlNode *) = 0; +#define tailOf __pyx_api_f_4lxml_5etree_tailOf +static int (*__pyx_api_f_4lxml_5etree_setNodeText)(xmlNode *, PyObject *) = 0; +#define setNodeText __pyx_api_f_4lxml_5etree_setNodeText +static int (*__pyx_api_f_4lxml_5etree_setTailText)(xmlNode *, PyObject *) = 0; +#define setTailText __pyx_api_f_4lxml_5etree_setTailText +static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValue)(xmlNode *, xmlAttr *) = 0; +#define attributeValue __pyx_api_f_4lxml_5etree_attributeValue +static PyObject *(*__pyx_api_f_4lxml_5etree_attributeValueFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define attributeValueFromNsName __pyx_api_f_4lxml_5etree_attributeValueFromNsName +static PyObject *(*__pyx_api_f_4lxml_5etree_getAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; +#define getAttributeValue __pyx_api_f_4lxml_5etree_getAttributeValue +static PyObject *(*__pyx_api_f_4lxml_5etree_iterattributes)(struct LxmlElement *, int) = 0; +#define iterattributes __pyx_api_f_4lxml_5etree_iterattributes +static PyObject *(*__pyx_api_f_4lxml_5etree_collectAttributes)(xmlNode *, int) = 0; +#define collectAttributes __pyx_api_f_4lxml_5etree_collectAttributes +static int (*__pyx_api_f_4lxml_5etree_setAttributeValue)(struct LxmlElement *, PyObject *, PyObject *) = 0; +#define setAttributeValue __pyx_api_f_4lxml_5etree_setAttributeValue +static int (*__pyx_api_f_4lxml_5etree_delAttribute)(struct LxmlElement *, PyObject *) = 0; +#define delAttribute __pyx_api_f_4lxml_5etree_delAttribute +static int (*__pyx_api_f_4lxml_5etree_delAttributeFromNsName)(xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define delAttributeFromNsName __pyx_api_f_4lxml_5etree_delAttributeFromNsName +static int (*__pyx_api_f_4lxml_5etree_hasChild)(xmlNode *) = 0; +#define hasChild __pyx_api_f_4lxml_5etree_hasChild +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChild)(xmlNode *, Py_ssize_t) = 0; +#define findChild __pyx_api_f_4lxml_5etree_findChild +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildForwards)(xmlNode *, Py_ssize_t) = 0; +#define findChildForwards __pyx_api_f_4lxml_5etree_findChildForwards +static xmlNode *(*__pyx_api_f_4lxml_5etree_findChildBackwards)(xmlNode *, Py_ssize_t) = 0; +#define findChildBackwards __pyx_api_f_4lxml_5etree_findChildBackwards +static xmlNode *(*__pyx_api_f_4lxml_5etree_nextElement)(xmlNode *) = 0; +#define nextElement __pyx_api_f_4lxml_5etree_nextElement +static xmlNode *(*__pyx_api_f_4lxml_5etree_previousElement)(xmlNode *) = 0; +#define previousElement __pyx_api_f_4lxml_5etree_previousElement +static void (*__pyx_api_f_4lxml_5etree_appendChild)(struct LxmlElement *, struct LxmlElement *) = 0; +#define appendChild __pyx_api_f_4lxml_5etree_appendChild +static int (*__pyx_api_f_4lxml_5etree_appendChildToElement)(struct LxmlElement *, struct LxmlElement *) = 0; +#define appendChildToElement __pyx_api_f_4lxml_5etree_appendChildToElement +static PyObject *(*__pyx_api_f_4lxml_5etree_pyunicode)(const xmlChar *) = 0; +#define pyunicode __pyx_api_f_4lxml_5etree_pyunicode +static PyObject *(*__pyx_api_f_4lxml_5etree_utf8)(PyObject *) = 0; +#define utf8 __pyx_api_f_4lxml_5etree_utf8 +static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTag)(PyObject *) = 0; +#define getNsTag __pyx_api_f_4lxml_5etree_getNsTag +static PyObject *(*__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs)(PyObject *) = 0; +#define getNsTagWithEmptyNs __pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs +static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedName)(xmlNode *) = 0; +#define namespacedName __pyx_api_f_4lxml_5etree_namespacedName +static PyObject *(*__pyx_api_f_4lxml_5etree_namespacedNameFromNsName)(const xmlChar *, const xmlChar *) = 0; +#define namespacedNameFromNsName __pyx_api_f_4lxml_5etree_namespacedNameFromNsName +static void (*__pyx_api_f_4lxml_5etree_iteratorStoreNext)(struct LxmlElementIterator *, struct LxmlElement *) = 0; +#define iteratorStoreNext __pyx_api_f_4lxml_5etree_iteratorStoreNext +static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatcher *, PyObject *) = 0; +#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch +static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0; +#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix +#ifndef __PYX_HAVE_RT_ImportFunction_3_0_11 +#define __PYX_HAVE_RT_ImportFunction_3_0_11 +static int __Pyx_ImportFunction_3_0_11(PyObject *module, const char *funcname, void (**f)(void), const char *sig) { + PyObject *d = 0; + PyObject *cobj = 0; + union { + void (*fp)(void); + void *p; + } tmp; + d = PyObject_GetAttrString(module, (char *)"__pyx_capi__"); + if (!d) + goto bad; + cobj = PyDict_GetItemString(d, funcname); + if (!cobj) { + PyErr_Format(PyExc_ImportError, + "%.200s does not export expected C function %.200s", + PyModule_GetName(module), funcname); + goto bad; + } + if (!PyCapsule_IsValid(cobj, sig)) { + PyErr_Format(PyExc_TypeError, + "C function %.200s.%.200s has wrong signature (expected %.500s, got %.500s)", + PyModule_GetName(module), funcname, sig, PyCapsule_GetName(cobj)); + goto bad; + } + tmp.p = PyCapsule_GetPointer(cobj, sig); + *f = tmp.fp; + if (!(*f)) + goto bad; + Py_DECREF(d); + return 0; +bad: + Py_XDECREF(d); + return -1; +} +#endif + + +static int import_lxml__etree(void) { + PyObject *module = 0; + module = PyImport_ImportModule("lxml.etree"); + if (!module) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad; + if (__Pyx_ImportFunction_3_0_11(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad; + Py_DECREF(module); module = 0; + return 0; + bad: + Py_XDECREF(module); + return -1; +} + +#endif /* !__PYX_HAVE_API__lxml__etree */ diff --git a/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi b/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi new file mode 100644 index 00000000..a3c86f0e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/nsclasses.pxi @@ -0,0 +1,281 @@ +# module-level API for namespace implementations + +cdef class LxmlRegistryError(LxmlError): + """Base class of lxml registry errors. + """ + +cdef class NamespaceRegistryError(LxmlRegistryError): + """Error registering a namespace extension. + """ + + +@cython.internal +cdef class _NamespaceRegistry: + "Dictionary-like namespace registry" + cdef object _ns_uri + cdef bytes _ns_uri_utf + cdef dict _entries + cdef char* _c_ns_uri_utf + def __cinit__(self, ns_uri): + self._ns_uri = ns_uri + if ns_uri is None: + self._ns_uri_utf = None + self._c_ns_uri_utf = NULL + else: + self._ns_uri_utf = _utf8(ns_uri) + self._c_ns_uri_utf = _cstr(self._ns_uri_utf) + self._entries = {} + + def update(self, class_dict_iterable): + """update(self, class_dict_iterable) + + Forgivingly update the registry. + + ``class_dict_iterable`` may be a dict or some other iterable + that yields (name, value) pairs. + + If a value does not match the required type for this registry, + or if the name starts with '_', it will be silently discarded. + This allows registrations at the module or class level using + vars(), globals() etc.""" + if hasattr(class_dict_iterable, 'items'): + class_dict_iterable = class_dict_iterable.items() + for name, item in class_dict_iterable: + if (name is None or name[:1] != '_') and callable(item): + self[name] = item + + def __getitem__(self, name): + if name is not None: + name = _utf8(name) + return self._get(name) + + def __delitem__(self, name): + if name is not None: + name = _utf8(name) + del self._entries[name] + + cdef object _get(self, object name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItem(self._entries, name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return <object>dict_result + + cdef object _getForString(self, char* name): + cdef python.PyObject* dict_result + dict_result = python.PyDict_GetItem(self._entries, name) + if dict_result is NULL: + raise KeyError, "Name not registered." + return <object>dict_result + + def __iter__(self): + return iter(self._entries) + + def items(self): + return list(self._entries.items()) + + def iteritems(self): + return iter(self._entries.items()) + + def clear(self): + self._entries.clear() + + def __call__(self, obj): + # Usage as decorator: + # ns = lookup.get_namespace("...") + # @ns('abc') + # class element(ElementBase): pass + # + # @ns + # class elementname(ElementBase): pass + + if obj is None or python._isString(obj): + # @ns(None) or @ns('tag') + return partial(self.__deco, obj) + # plain @ns decorator + self[obj.__name__] = obj + return obj + + def __deco(self, name, obj): + self[name] = obj + return obj + + +@cython.final +@cython.internal +cdef class _ClassNamespaceRegistry(_NamespaceRegistry): + "Dictionary-like registry for namespace implementation classes" + def __setitem__(self, name, item): + if not isinstance(item, type) or not issubclass(item, ElementBase): + raise NamespaceRegistryError, \ + "Registered element classes must be subtypes of ElementBase" + if name is not None: + name = _utf8(name) + self._entries[name] = item + + def __repr__(self): + return "Namespace(%r)" % self._ns_uri + + +cdef class ElementNamespaceClassLookup(FallbackElementClassLookup): + """ElementNamespaceClassLookup(self, fallback=None) + + Element class lookup scheme that searches the Element class in the + Namespace registry. + + Usage: + + >>> lookup = ElementNamespaceClassLookup() + >>> ns_elements = lookup.get_namespace("http://schema.org/Movie") + + >>> @ns_elements + ... class movie(ElementBase): + ... "Element implementation for 'movie' tag (using class name) in schema namespace." + + >>> @ns_elements("movie") + ... class MovieElement(ElementBase): + ... "Element implementation for 'movie' tag (explicit tag name) in schema namespace." + """ + cdef dict _namespace_registries + def __cinit__(self): + self._namespace_registries = {} + + def __init__(self, ElementClassLookup fallback=None): + FallbackElementClassLookup.__init__(self, fallback) + self._lookup_function = _find_nselement_class + + def get_namespace(self, ns_uri): + """get_namespace(self, ns_uri) + + Retrieve the namespace object associated with the given URI. + Pass None for the empty namespace. + + Creates a new namespace object if it does not yet exist.""" + if ns_uri: + ns_utf = _utf8(ns_uri) + else: + ns_utf = None + try: + return self._namespace_registries[ns_utf] + except KeyError: + registry = self._namespace_registries[ns_utf] = \ + _ClassNamespaceRegistry(ns_uri) + return registry + +cdef object _find_nselement_class(state, _Document doc, xmlNode* c_node): + cdef python.PyObject* dict_result + cdef ElementNamespaceClassLookup lookup + cdef _NamespaceRegistry registry + if state is None: + return _lookupDefaultElementClass(None, doc, c_node) + + lookup = <ElementNamespaceClassLookup>state + if c_node.type != tree.XML_ELEMENT_NODE: + return _callLookupFallback(lookup, doc, c_node) + + c_namespace_utf = _getNs(c_node) + if c_namespace_utf is not NULL: + dict_result = python.PyDict_GetItem( + lookup._namespace_registries, <unsigned char*>c_namespace_utf) + else: + dict_result = python.PyDict_GetItem( + lookup._namespace_registries, None) + if dict_result is not NULL: + registry = <_NamespaceRegistry>dict_result + classes = registry._entries + + if c_node.name is not NULL: + dict_result = python.PyDict_GetItem( + classes, <unsigned char*>c_node.name) + else: + dict_result = NULL + + if dict_result is NULL: + dict_result = python.PyDict_GetItem(classes, None) + + if dict_result is not NULL: + return <object>dict_result + return _callLookupFallback(lookup, doc, c_node) + + +################################################################################ +# XPath extension functions + +cdef dict __FUNCTION_NAMESPACE_REGISTRIES +__FUNCTION_NAMESPACE_REGISTRIES = {} + +def FunctionNamespace(ns_uri): + """FunctionNamespace(ns_uri) + + Retrieve the function namespace object associated with the given + URI. + + Creates a new one if it does not yet exist. A function namespace + can only be used to register extension functions. + + Usage: + + >>> ns_functions = FunctionNamespace("http://schema.org/Movie") + + >>> @ns_functions # uses function name + ... def add2(x): + ... return x + 2 + + >>> @ns_functions("add3") # uses explicit name + ... def add_three(x): + ... return x + 3 + """ + ns_utf = _utf8(ns_uri) if ns_uri else None + try: + return __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] + except KeyError: + registry = __FUNCTION_NAMESPACE_REGISTRIES[ns_utf] = \ + _XPathFunctionNamespaceRegistry(ns_uri) + return registry + +@cython.internal +cdef class _FunctionNamespaceRegistry(_NamespaceRegistry): + def __setitem__(self, name, item): + if not callable(item): + raise NamespaceRegistryError, \ + "Registered functions must be callable." + if not name: + raise ValueError, \ + "extensions must have non empty names" + self._entries[_utf8(name)] = item + + def __repr__(self): + return "FunctionNamespace(%r)" % self._ns_uri + +@cython.final +@cython.internal +cdef class _XPathFunctionNamespaceRegistry(_FunctionNamespaceRegistry): + cdef object _prefix + cdef bytes _prefix_utf + + property prefix: + "Namespace prefix for extension functions." + def __del__(self): + self._prefix = None # no prefix configured + self._prefix_utf = None + def __get__(self): + if self._prefix is None: + return '' + else: + return self._prefix + def __set__(self, prefix): + if prefix == '': + prefix = None # empty prefix + self._prefix_utf = _utf8(prefix) if prefix is not None else None + self._prefix = prefix + +cdef list _find_all_extension_prefixes(): + "Internal lookup function to find all function prefixes for XSLT/XPath." + cdef _XPathFunctionNamespaceRegistry registry + cdef list ns_prefixes = [] + for registry in __FUNCTION_NAMESPACE_REGISTRIES.itervalues(): + if registry._prefix_utf is not None: + if registry._ns_uri_utf is not None: + ns_prefixes.append( + (registry._prefix_utf, registry._ns_uri_utf)) + return ns_prefixes diff --git a/.venv/lib/python3.12/site-packages/lxml/objectify.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/objectify.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..885189c6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/objectify.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/objectify.pyx b/.venv/lib/python3.12/site-packages/lxml/objectify.pyx new file mode 100644 index 00000000..0ff92226 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/objectify.pyx @@ -0,0 +1,2145 @@ +# cython: binding=True +# cython: auto_pickle=False +# cython: language_level=3 + +""" +The ``lxml.objectify`` module implements a Python object API for XML. +It is based on `lxml.etree`. +""" + +cimport cython + +from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup +from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode +from lxml.includes.tree cimport const_xmlChar, _xcstr +from lxml cimport python +from lxml.includes cimport tree + +cimport lxml.includes.etreepublic as cetree +cimport libc.string as cstring_h # not to be confused with stdlib 'string' +from libc.string cimport const_char + +__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker', + 'FloatElement', 'IntElement', 'NoneElement', + 'NumberElement', 'ObjectPath', 'ObjectifiedDataElement', + 'ObjectifiedElement', 'ObjectifyElementClassLookup', + 'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement', + 'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str', + 'fromstring', 'getRegisteredTypes', 'makeparser', 'parse', + 'pyannotate', 'pytypename', 'set_default_parser', + 'set_pytype_attribute_tag', 'xsiannotate'] + +cdef object etree +from lxml import etree +# initialize C-API of lxml.etree +import_lxml__etree() + +__version__ = etree.__version__ + +cdef object _float_is_inf, _float_is_nan +from math import isinf as _float_is_inf, isnan as _float_is_nan + +cdef object re +import re + +cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError) +cdef object is_special_method = re.compile('__.*__$').match + + +cdef object _typename(object t): + cdef const_char* c_name + c_name = python._fqtypename(t) + s = cstring_h.strrchr(c_name, c'.') + if s is not NULL: + c_name = s + 1 + return pyunicode(<const_xmlChar*>c_name) + + +# namespace/name for "pytype" hint attribute +cdef object PYTYPE_NAMESPACE +cdef bytes PYTYPE_NAMESPACE_UTF8 +cdef const_xmlChar* _PYTYPE_NAMESPACE + +cdef object PYTYPE_ATTRIBUTE_NAME +cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8 +cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME + +PYTYPE_ATTRIBUTE = None + +cdef unicode TREE_PYTYPE_NAME = "TREE" + +cdef tuple _unicodeAndUtf8(s): + return s, python.PyUnicode_AsUTF8String(s) + +def set_pytype_attribute_tag(attribute_tag=None): + """set_pytype_attribute_tag(attribute_tag=None) + Change name and namespace of the XML attribute that holds Python type + information. + + Do not use this unless you know what you are doing. + + Reset by calling without argument. + + Default: "{http://codespeak.net/lxml/objectify/pytype}pytype" + """ + global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME + global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 + global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 + if attribute_tag is None: + PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \ + _unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype") + PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ + _unicodeAndUtf8("pytype") + else: + PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \ + cetree.getNsTag(attribute_tag) + PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8') + PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8') + + _PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8 + _PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8 + PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName( + _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + +set_pytype_attribute_tag() + + +# namespaces for XML Schema +cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 +XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \ + _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema") +cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8) + +cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 +XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \ + _unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance") +cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8) + +cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS +cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS + + +################################################################################ +# Element class for the main API + +cdef class ObjectifiedElement(ElementBase): + """Main XML Element class. + + Element children are accessed as object attributes. Multiple children + with the same name are available through a list index. Example:: + + >>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>") + >>> second_c2 = root.c1.c2[1] + >>> print(second_c2.text) + 1 + + Note that you cannot (and must not) instantiate this class or its + subclasses. + """ + def __iter__(self): + """Iterate over self and all siblings with the same tag. + """ + parent = self.getparent() + if parent is None: + return iter([self]) + return etree.ElementChildIterator(parent, tag=self.tag) + + def __str__(self): + if __RECURSIVE_STR: + return _dump(self, 0) + else: + return textOf(self._c_node) or '' + + # pickle support for objectified Element + def __reduce__(self): + return fromstring, (etree.tostring(self),) + + @property + def text(self): + return textOf(self._c_node) + + @property + def __dict__(self): + """A fake implementation for __dict__ to support dir() etc. + + Note that this only considers the first child with a given name. + """ + cdef _Element child + cdef dict children + c_ns = tree._getNs(self._c_node) + tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None + children = {} + for child in etree.ElementChildIterator(self, tag=tag): + if c_ns is NULL and tree._getNs(child._c_node) is not NULL: + continue + name = pyunicode(child._c_node.name) + if name not in children: + children[name] = child + return children + + def __len__(self): + """Count self and siblings with the same tag. + """ + return _countSiblings(self._c_node) + + def countchildren(self): + """countchildren(self) + + Return the number of children of this element, regardless of their + name. + """ + # copied from etree + cdef Py_ssize_t c + cdef tree.xmlNode* c_node + c = 0 + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + c += 1 + c_node = c_node.next + return c + + def getchildren(self): + """getchildren(self) + + Returns a sequence of all direct children. The elements are + returned in document order. + """ + cdef tree.xmlNode* c_node + result = [] + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + result.append(cetree.elementFactory(self._doc, c_node)) + c_node = c_node.next + return result + + def __getattr__(self, tag): + """Return the (first) child with the given tag name. If no namespace + is provided, the child will be looked up in the same one as self. + """ + return _lookupChildOrRaise(self, tag) + + def __setattr__(self, tag, value): + """Set the value of the (first) child with the given tag name. If no + namespace is provided, the child will be looked up in the same one as + self. + """ + cdef _Element element + # properties are looked up /after/ __setattr__, so we must emulate them + if tag == 'text' or tag == 'pyval': + # read-only ! + raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable" + elif tag == 'tail': + cetree.setTailText(self._c_node, value) + return + elif tag == 'tag': + ElementBase.tag.__set__(self, value) + return + elif tag == 'base': + ElementBase.base.__set__(self, value) + return + tag = _buildChildTag(self, tag) + element = _lookupChild(self, tag) + if element is None: + _appendValue(self, tag, value) + else: + _replaceElement(element, value) + + def __delattr__(self, tag): + child = _lookupChildOrRaise(self, tag) + self.remove(child) + + def addattr(self, tag, value): + """addattr(self, tag, value) + + Add a child value to the element. + + As opposed to append(), it sets a data value, not an element. + """ + _appendValue(self, _buildChildTag(self, tag), value) + + def __getitem__(self, key): + """Return a sibling, counting from the first child of the parent. The + method behaves like both a dict and a sequence. + + * If argument is an integer, returns the sibling at that position. + + * If argument is a string, does the same as getattr(). This can be + used to provide namespaces for element lookup, or to look up + children with special names (``text`` etc.). + + * If argument is a slice object, returns the matching slice. + """ + cdef tree.xmlNode* c_self_node + cdef tree.xmlNode* c_parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_index + if python._isString(key): + return _lookupChildOrRaise(self, key) + elif isinstance(key, slice): + return list(self)[key] + # normal item access + c_index = key # raises TypeError if necessary + c_self_node = self._c_node + c_parent = c_self_node.parent + if c_parent is NULL: + if c_index == 0 or c_index == -1: + return self + raise IndexError, unicode(key) + if c_index < 0: + c_node = c_parent.last + else: + c_node = c_parent.children + c_node = _findFollowingSibling( + c_node, tree._getNs(c_self_node), c_self_node.name, c_index) + if c_node is NULL: + raise IndexError, unicode(key) + return elementFactory(self._doc, c_node) + + def __setitem__(self, key, value): + """Set the value of a sibling, counting from the first child of the + parent. Implements key assignment, item assignment and slice + assignment. + + * If argument is an integer, sets the sibling at that position. + + * If argument is a string, does the same as setattr(). This is used + to provide namespaces for element lookup. + + * If argument is a sequence (list, tuple, etc.), assign the contained + items to the siblings. + """ + cdef _Element element + cdef tree.xmlNode* c_node + if python._isString(key): + key = _buildChildTag(self, key) + element = _lookupChild(self, key) + if element is None: + _appendValue(self, key, value) + else: + _replaceElement(element, value) + return + + if self._c_node.parent is NULL: + # the 'root[i] = ...' case + raise TypeError, "assignment to root element is invalid" + + if isinstance(key, slice): + # slice assignment + _setSlice(key, self, value) + else: + # normal index assignment + if key < 0: + c_node = self._c_node.parent.last + else: + c_node = self._c_node.parent.children + c_node = _findFollowingSibling( + c_node, tree._getNs(self._c_node), self._c_node.name, key) + if c_node is NULL: + raise IndexError, unicode(key) + element = elementFactory(self._doc, c_node) + _replaceElement(element, value) + + def __delitem__(self, key): + parent = self.getparent() + if parent is None: + raise TypeError, "deleting items not supported by root element" + if isinstance(key, slice): + # slice deletion + del_items = list(self)[key] + remove = parent.remove + for el in del_items: + remove(el) + else: + # normal index deletion + sibling = self.__getitem__(key) + parent.remove(sibling) + + def descendantpaths(self, prefix=None): + """descendantpaths(self, prefix=None) + + Returns a list of object path expressions for all descendants. + """ + if prefix is not None and not python._isString(prefix): + prefix = '.'.join(prefix) + return _build_descendant_paths(self._c_node, prefix) + + +cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name): + if c_node.name != c_name: + return 0 + if c_href == NULL: + return 1 + c_node_href = tree._getNs(c_node) + if c_node_href == NULL: + return c_href[0] == c'\0' + return tree.xmlStrcmp(c_node_href, c_href) == 0 + + +cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node): + cdef tree.xmlNode* c_node + cdef Py_ssize_t count + c_tag = c_start_node.name + c_href = tree._getNs(c_start_node) + count = 1 + c_node = c_start_node.next + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, c_href, c_tag): + count += 1 + c_node = c_node.next + c_node = c_start_node.prev + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, c_href, c_tag): + count += 1 + c_node = c_node.prev + return count + +cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node, + const_xmlChar* href, const_xmlChar* name, + Py_ssize_t index): + cdef tree.xmlNode* (*next)(tree.xmlNode*) + if index >= 0: + next = cetree.nextElement + else: + index = -1 - index + next = cetree.previousElement + while c_node is not NULL: + if c_node.type == tree.XML_ELEMENT_NODE and \ + _tagMatches(c_node, href, name): + index = index - 1 + if index < 0: + return c_node + c_node = next(c_node) + return NULL + +cdef object _lookupChild(_Element parent, tag): + cdef tree.xmlNode* c_result + cdef tree.xmlNode* c_node + c_node = parent._c_node + ns, tag = cetree.getNsTagWithEmptyNs(tag) + c_tag = tree.xmlDictExists( + c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag)) + if c_tag is NULL: + return None # not in the hash map => not in the tree + if ns is None: + # either inherit ns from parent or use empty (i.e. no) namespace + c_href = tree._getNs(c_node) or <const_xmlChar*>'' + else: + c_href = _xcstr(ns) + c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0) + if c_result is NULL: + return None + return elementFactory(parent._doc, c_result) + +cdef object _lookupChildOrRaise(_Element parent, tag): + element = _lookupChild(parent, tag) + if element is None: + raise AttributeError, "no such child: " + _buildChildTag(parent, tag) + return element + +cdef object _buildChildTag(_Element parent, tag): + ns, tag = cetree.getNsTag(tag) + c_tag = _xcstr(tag) + c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns) + return cetree.namespacedNameFromNsName(c_href, c_tag) + +cdef _replaceElement(_Element element, value): + cdef _Element new_element + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + element._doc, (<_Element>value)._c_node) + new_element.tag = element.tag + elif isinstance(value, (list, tuple)): + element[:] = value + return + else: + new_element = element.makeelement(element.tag) + _setElementValue(new_element, value) + element.getparent().replace(element, new_element) + +cdef _appendValue(_Element parent, tag, value): + cdef _Element new_element + if isinstance(value, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + parent._doc, (<_Element>value)._c_node) + new_element.tag = tag + cetree.appendChildToElement(parent, new_element) + elif isinstance(value, (list, tuple)): + for item in value: + _appendValue(parent, tag, item) + else: + new_element = cetree.makeElement( + tag, parent._doc, None, None, None, None, None) + _setElementValue(new_element, value) + cetree.appendChildToElement(parent, new_element) + +cdef _setElementValue(_Element element, value): + if value is None: + cetree.setAttributeValue( + element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + elif isinstance(value, _Element): + _replaceElement(element, value) + return + else: + cetree.delAttributeFromNsName( + element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") + if python._isString(value): + pytype_name = "str" + py_type = <PyType>_PYTYPE_DICT.get(pytype_name) + else: + pytype_name = _typename(value) + py_type = <PyType>_PYTYPE_DICT.get(pytype_name) + if py_type is not None: + value = py_type.stringify(value) + else: + value = unicode(value) + if py_type is not None: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) + else: + cetree.delAttributeFromNsName( + element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + cetree.setNodeText(element._c_node, value) + +cdef _setSlice(sliceobject, _Element target, items): + cdef _Element parent + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_step, c_start, pos + # collect existing slice + if (<slice>sliceobject).step is None: + c_step = 1 + else: + c_step = (<slice>sliceobject).step + if c_step == 0: + raise ValueError, "Invalid slice" + cdef list del_items = target[sliceobject] + + # collect new values + new_items = [] + tag = target.tag + for item in items: + if isinstance(item, _Element): + # deep copy the new element + new_element = cetree.deepcopyNodeToDocument( + target._doc, (<_Element>item)._c_node) + new_element.tag = tag + else: + new_element = cetree.makeElement( + tag, target._doc, None, None, None, None, None) + _setElementValue(new_element, item) + new_items.append(new_element) + + # sanity check - raise what a list would raise + if c_step != 1 and len(del_items) != len(new_items): + raise ValueError, \ + f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}" + + # replace existing items + pos = 0 + parent = target.getparent() + replace = parent.replace + while pos < len(new_items) and pos < len(del_items): + replace(del_items[pos], new_items[pos]) + pos += 1 + # remove leftover items + if pos < len(del_items): + remove = parent.remove + while pos < len(del_items): + remove(del_items[pos]) + pos += 1 + # append remaining new items + if pos < len(new_items): + # the sanity check above guarantees (step == 1) + if pos > 0: + item = new_items[pos-1] + else: + if (<slice>sliceobject).start > 0: + c_node = parent._c_node.children + else: + c_node = parent._c_node.last + c_node = _findFollowingSibling( + c_node, tree._getNs(target._c_node), target._c_node.name, + (<slice>sliceobject).start - 1) + if c_node is NULL: + while pos < len(new_items): + cetree.appendChildToElement(parent, new_items[pos]) + pos += 1 + return + item = cetree.elementFactory(parent._doc, c_node) + while pos < len(new_items): + add = item.addnext + item = new_items[pos] + add(item) + pos += 1 + +################################################################################ +# Data type support in subclasses + +cdef class ObjectifiedDataElement(ObjectifiedElement): + """This is the base class for all data type Elements. Subclasses should + override the 'pyval' property and possibly the __str__ method. + """ + @property + def pyval(self): + return textOf(self._c_node) + + def __str__(self): + return textOf(self._c_node) or '' + + def __repr__(self): + return textOf(self._c_node) or '' + + def _setText(self, s): + """For use in subclasses only. Don't use unless you know what you are + doing. + """ + cetree.setNodeText(self._c_node, s) + + +cdef class NumberElement(ObjectifiedDataElement): + cdef object _parse_value + + def _setValueParser(self, function): + """Set the function that parses the Python value from a string. + + Do not use this unless you know what you are doing. + """ + self._parse_value = function + + @property + def pyval(self): + return _parseNumber(self) + + def __int__(self): + return int(_parseNumber(self)) + + def __float__(self): + return float(_parseNumber(self)) + + def __complex__(self): + return complex(_parseNumber(self)) + + def __str__(self): + return unicode(_parseNumber(self)) + + def __repr__(self): + return repr(_parseNumber(self)) + + def __oct__(self): + return oct(_parseNumber(self)) + + def __hex__(self): + return hex(_parseNumber(self)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(_parseNumber(self)) + + def __add__(self, other): + return _numericValueOf(self) + _numericValueOf(other) + + def __radd__(self, other): + return _numericValueOf(other) + _numericValueOf(self) + + def __sub__(self, other): + return _numericValueOf(self) - _numericValueOf(other) + + def __rsub__(self, other): + return _numericValueOf(other) - _numericValueOf(self) + + def __mul__(self, other): + return _numericValueOf(self) * _numericValueOf(other) + + def __rmul__(self, other): + return _numericValueOf(other) * _numericValueOf(self) + + def __div__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __rdiv__(self, other): + return _numericValueOf(other) / _numericValueOf(self) + + def __truediv__(self, other): + return _numericValueOf(self) / _numericValueOf(other) + + def __rtruediv__(self, other): + return _numericValueOf(other) / _numericValueOf(self) + + def __floordiv__(self, other): + return _numericValueOf(self) // _numericValueOf(other) + + def __rfloordiv__(self, other): + return _numericValueOf(other) // _numericValueOf(self) + + def __mod__(self, other): + return _numericValueOf(self) % _numericValueOf(other) + + def __rmod__(self, other): + return _numericValueOf(other) % _numericValueOf(self) + + def __divmod__(self, other): + return divmod(_numericValueOf(self), _numericValueOf(other)) + + def __rdivmod__(self, other): + return divmod(_numericValueOf(other), _numericValueOf(self)) + + def __pow__(self, other, modulo): + if modulo is None: + return _numericValueOf(self) ** _numericValueOf(other) + else: + return pow(_numericValueOf(self), _numericValueOf(other), modulo) + + def __rpow__(self, other, modulo): + if modulo is None: + return _numericValueOf(other) ** _numericValueOf(self) + else: + return pow(_numericValueOf(other), _numericValueOf(self), modulo) + + def __neg__(self): + return - _numericValueOf(self) + + def __pos__(self): + return + _numericValueOf(self) + + def __abs__(self): + return abs( _numericValueOf(self) ) + + def __bool__(self): + return bool(_numericValueOf(self)) + + def __invert__(self): + return ~ _numericValueOf(self) + + def __lshift__(self, other): + return _numericValueOf(self) << _numericValueOf(other) + + def __rlshift__(self, other): + return _numericValueOf(other) << _numericValueOf(self) + + def __rshift__(self, other): + return _numericValueOf(self) >> _numericValueOf(other) + + def __rrshift__(self, other): + return _numericValueOf(other) >> _numericValueOf(self) + + def __and__(self, other): + return _numericValueOf(self) & _numericValueOf(other) + + def __rand__(self, other): + return _numericValueOf(other) & _numericValueOf(self) + + def __or__(self, other): + return _numericValueOf(self) | _numericValueOf(other) + + def __ror__(self, other): + return _numericValueOf(other) | _numericValueOf(self) + + def __xor__(self, other): + return _numericValueOf(self) ^ _numericValueOf(other) + + def __rxor__(self, other): + return _numericValueOf(other) ^ _numericValueOf(self) + + +cdef class IntElement(NumberElement): + def _init(self): + self._parse_value = int + + def __index__(self): + return int(_parseNumber(self)) + + +cdef class FloatElement(NumberElement): + def _init(self): + self._parse_value = float + + +cdef class StringElement(ObjectifiedDataElement): + """String data class. + + Note that this class does *not* support the sequence protocol of strings: + len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported. + Instead, use the .text attribute to get a 'real' string. + """ + @property + def pyval(self): + return textOf(self._c_node) or '' + + def __repr__(self): + return repr(textOf(self._c_node) or '') + + def strlen(self): + text = textOf(self._c_node) + if text is None: + return 0 + else: + return len(text) + + def __bool__(self): + return bool(textOf(self._c_node)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(textOf(self._c_node) or '') + + def __add__(self, other): + text = _strValueOf(self) + other = _strValueOf(other) + return text + other + + def __radd__(self, other): + text = _strValueOf(self) + other = _strValueOf(other) + return other + text + + def __mul__(self, other): + if isinstance(self, StringElement): + return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other) + elif isinstance(other, StringElement): + return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '') + else: + return NotImplemented + + def __rmul__(self, other): + return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '') + + def __mod__(self, other): + return (_strValueOf(self) or '') % other + + def __int__(self): + return int(textOf(self._c_node)) + + def __float__(self): + return float(textOf(self._c_node)) + + def __complex__(self): + return complex(textOf(self._c_node)) + + +cdef class NoneElement(ObjectifiedDataElement): + def __str__(self): + return "None" + + def __repr__(self): + return "None" + + def __bool__(self): + return False + + def __richcmp__(self, other, int op): + if other is None or self is None: + return python.PyObject_RichCompare(None, None, op) + if isinstance(self, NoneElement): + return python.PyObject_RichCompare(None, other, op) + else: + return python.PyObject_RichCompare(self, None, op) + + def __hash__(self): + return hash(None) + + @property + def pyval(self): + return None + + +cdef class BoolElement(IntElement): + """Boolean type base on string values: 'true' or 'false'. + + Note that this inherits from IntElement to mimic the behaviour of + Python's bool type. + """ + def _init(self): + self._parse_value = _parseBool # wraps as Python callable + + def __bool__(self): + return _parseBool(textOf(self._c_node)) + + def __int__(self): + return 0 + _parseBool(textOf(self._c_node)) + + def __float__(self): + return 0.0 + _parseBool(textOf(self._c_node)) + + def __richcmp__(self, other, int op): + return _richcmpPyvals(self, other, op) + + def __hash__(self): + return hash(_parseBool(textOf(self._c_node))) + + def __str__(self): + return unicode(_parseBool(textOf(self._c_node))) + + def __repr__(self): + return repr(_parseBool(textOf(self._c_node))) + + @property + def pyval(self): + return _parseBool(textOf(self._c_node)) + + +cdef _checkBool(s): + cdef int value = -1 + if s is not None: + value = __parseBoolAsInt(s) + if value == -1: + raise ValueError + + +cdef bint _parseBool(s) except -1: + cdef int value + if s is None: + return False + value = __parseBoolAsInt(s) + if value == -1: + raise ValueError, f"Invalid boolean value: '{s}'" + return value + + +cdef inline int __parseBoolAsInt(text) except -2: + if text == 'false': + return 0 + elif text == 'true': + return 1 + elif text == '0': + return 0 + elif text == '1': + return 1 + return -1 + + +cdef object _parseNumber(NumberElement element): + return element._parse_value(textOf(element._c_node)) + + +cdef enum NumberParserState: + NPS_SPACE_PRE = 0 + NPS_SIGN = 1 + NPS_DIGITS = 2 + NPS_POINT_LEAD = 3 + NPS_POINT = 4 + NPS_FRACTION = 5 + NPS_EXP = 6 + NPS_EXP_SIGN = 7 + NPS_DIGITS_EXP = 8 + NPS_SPACE_TAIL = 9 + NPS_INF1 = 20 + NPS_INF2 = 21 + NPS_INF3 = 22 + NPS_NAN1 = 23 + NPS_NAN2 = 24 + NPS_NAN3 = 25 + NPS_ERROR = 99 + + +ctypedef fused bytes_unicode: + bytes + unicode + + +cdef _checkNumber(bytes_unicode s, bint allow_float): + cdef Py_UCS4 c + cdef NumberParserState state = NPS_SPACE_PRE + + for c in s: + if c in '0123456789': + if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP): + pass + elif state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_DIGITS + elif state in (NPS_POINT_LEAD, NPS_POINT): + state = NPS_FRACTION + elif state in (NPS_EXP, NPS_EXP_SIGN): + state = NPS_DIGITS_EXP + else: + state = NPS_ERROR + else: + if c == '.': + if state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_POINT_LEAD + elif state == NPS_DIGITS: + state = NPS_POINT + else: + state = NPS_ERROR + if not allow_float: + state = NPS_ERROR + elif c in '-+': + if state == NPS_SPACE_PRE: + state = NPS_SIGN + elif state == NPS_EXP: + state = NPS_EXP_SIGN + else: + state = NPS_ERROR + elif c == 'E': + if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION): + state = NPS_EXP + else: + state = NPS_ERROR + if not allow_float: + state = NPS_ERROR + # Allow INF and NaN. XMLSchema requires case, we don't, like Python. + elif c in 'iI': + state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR + elif c in 'fF': + state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR + elif c in 'aA': + state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR + elif c in 'nN': + # Python also allows [+-]NaN, so let's accept that. + if state in (NPS_SPACE_PRE, NPS_SIGN): + state = NPS_NAN1 if allow_float else NPS_ERROR + elif state == NPS_NAN2: + state = NPS_NAN3 + elif state == NPS_INF1: + state = NPS_INF2 + else: + state = NPS_ERROR + # Allow spaces around text values. + else: + if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20': + if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL): + pass + elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3): + state = NPS_SPACE_TAIL + else: + state = NPS_ERROR + else: + state = NPS_ERROR + + if state == NPS_ERROR: + break + + if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL): + raise ValueError + + +cdef _checkInt(s): + return _checkNumber(<unicode>s, allow_float=False) + + +cdef _checkFloat(s): + return _checkNumber(<unicode>s, allow_float=True) + + +cdef object _strValueOf(obj): + if python._isString(obj): + return obj + if isinstance(obj, _Element): + return textOf((<_Element>obj)._c_node) or '' + if obj is None: + return '' + return unicode(obj) + + +cdef object _numericValueOf(obj): + if isinstance(obj, NumberElement): + return _parseNumber(<NumberElement>obj) + try: + # not always numeric, but Python will raise the right exception + return obj.pyval + except AttributeError: + pass + return obj + + +cdef _richcmpPyvals(left, right, int op): + left = getattr(left, 'pyval', left) + right = getattr(right, 'pyval', right) + return python.PyObject_RichCompare(left, right, op) + + +################################################################################ +# Python type registry + +cdef class PyType: + """PyType(self, name, type_check, type_class, stringify=None) + User defined type. + + Named type that contains a type check function, a type class that + inherits from ObjectifiedDataElement and an optional "stringification" + function. The type check must take a string as argument and raise + ValueError or TypeError if it cannot handle the string value. It may be + None in which case it is not considered for type guessing. For registered + named types, the 'stringify' function (or unicode() if None) is used to + convert a Python object with type name 'name' to the string representation + stored in the XML tree. + + Example:: + + PyType('int', int, MyIntClass).register() + + Note that the order in which types are registered matters. The first + matching type will be used. + """ + cdef readonly object name + cdef readonly object type_check + cdef readonly object stringify + cdef object _type + cdef list _schema_types + def __init__(self, name, type_check, type_class, stringify=None): + if isinstance(name, bytes): + name = (<bytes>name).decode('ascii') + elif not isinstance(name, unicode): + raise TypeError, "Type name must be a string" + if type_check is not None and not callable(type_check): + raise TypeError, "Type check function must be callable (or None)" + if name != TREE_PYTYPE_NAME and \ + not issubclass(type_class, ObjectifiedDataElement): + raise TypeError, \ + "Data classes must inherit from ObjectifiedDataElement" + self.name = name + self._type = type_class + self.type_check = type_check + if stringify is None: + stringify = unicode + self.stringify = stringify + self._schema_types = [] + + def __repr__(self): + return "PyType(%s, %s)" % (self.name, self._type.__name__) + + def register(self, before=None, after=None): + """register(self, before=None, after=None) + + Register the type. + + The additional keyword arguments 'before' and 'after' accept a + sequence of type names that must appear before/after the new type in + the type list. If any of them is not currently known, it is simply + ignored. Raises ValueError if the dependencies cannot be fulfilled. + """ + if self.name == TREE_PYTYPE_NAME: + raise ValueError, "Cannot register tree type" + if self.type_check is not None: + for item in _TYPE_CHECKS: + if item[0] is self.type_check: + _TYPE_CHECKS.remove(item) + break + entry = (self.type_check, self) + first_pos = 0 + last_pos = -1 + if before or after: + if before is None: + before = () + elif after is None: + after = () + for i, (check, pytype) in enumerate(_TYPE_CHECKS): + if last_pos == -1 and pytype.name in before: + last_pos = i + if pytype.name in after: + first_pos = i+1 + if last_pos == -1: + _TYPE_CHECKS.append(entry) + elif first_pos > last_pos: + raise ValueError, "inconsistent before/after dependencies" + else: + _TYPE_CHECKS.insert(last_pos, entry) + + _PYTYPE_DICT[self.name] = self + for xs_type in self._schema_types: + _SCHEMA_TYPE_DICT[xs_type] = self + + def unregister(self): + "unregister(self)" + if _PYTYPE_DICT.get(self.name) is self: + del _PYTYPE_DICT[self.name] + for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()): + if pytype is self: + del _SCHEMA_TYPE_DICT[xs_type] + if self.type_check is None: + return + try: + _TYPE_CHECKS.remove( (self.type_check, self) ) + except ValueError: + pass + + property xmlSchemaTypes: + """The list of XML Schema datatypes this Python type maps to. + + Note that this must be set before registering the type! + """ + def __get__(self): + return self._schema_types + def __set__(self, types): + self._schema_types = list(map(unicode, types)) + + +cdef dict _PYTYPE_DICT = {} +cdef dict _SCHEMA_TYPE_DICT = {} +cdef list _TYPE_CHECKS = [] + +cdef unicode _xml_bool(value): + return "true" if value else "false" + +cdef unicode _xml_float(value): + if _float_is_inf(value): + if value > 0: + return "INF" + return "-INF" + if _float_is_nan(value): + return "NaN" + return unicode(repr(value)) + +cdef _pytypename(obj): + return "str" if python._isString(obj) else _typename(obj) + +def pytypename(obj): + """pytypename(obj) + + Find the name of the corresponding PyType for a Python object. + """ + return _pytypename(obj) + +cdef _registerPyTypes(): + pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python + pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort", + "unsignedByte", "nonPositiveInteger", + "negativeInteger", "long", "nonNegativeInteger", + "unsignedLong", "unsignedInt", "positiveInteger",) + pytype.register() + + # 'long' type just for backwards compatibility + pytype = PyType('long', None, IntElement) + pytype.register() + + pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python + pytype.xmlSchemaTypes = ("double", "float") + pytype.register() + + pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python + pytype.xmlSchemaTypes = ("boolean",) + pytype.register() + + pytype = PyType('str', None, StringElement) + pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language", + "Name", "NCName", "ID", "IDREF", "ENTITY", + "NMTOKEN", ) + pytype.register() + + # since lxml 2.0 + pytype = PyType('NoneType', None, NoneElement) + pytype.register() + + # backwards compatibility + pytype = PyType('none', None, NoneElement) + pytype.register() + +# non-registered PyType for inner tree elements +cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement) + +_registerPyTypes() + +def getRegisteredTypes(): + """getRegisteredTypes() + + Returns a list of the currently registered PyType objects. + + To add a new type, retrieve this list and call unregister() for all + entries. Then add the new type at a suitable position (possibly replacing + an existing one) and call register() for all entries. + + This is necessary if the new type interferes with the type check functions + of existing ones (normally only int/float/bool) and must the tried before + other types. To add a type that is not yet parsable by the current type + check functions, you can simply register() it, which will append it to the + end of the type list. + """ + cdef list types = [] + cdef set known = set() + for check, pytype in _TYPE_CHECKS: + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + for pytype in _PYTYPE_DICT.values(): + name = pytype.name + if name not in known: + known.add(name) + types.append(pytype) + return types + +cdef PyType _guessPyType(value, PyType defaulttype): + if value is None: + return None + for type_check, tested_pytype in _TYPE_CHECKS: + try: + type_check(value) + return <PyType>tested_pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specified type => ignore + pass + return defaulttype + +cdef object _guessElementClass(tree.xmlNode* c_node): + value = textOf(c_node) + if value is None: + return None + if value == '': + return StringElement + + for type_check, pytype in _TYPE_CHECKS: + try: + type_check(value) + return (<PyType>pytype)._type + except IGNORABLE_ERRORS: + pass + return None + +################################################################################ +# adapted ElementMaker supports registered PyTypes + +@cython.final +@cython.internal +cdef class _ObjectifyElementMakerCaller: + cdef object _tag + cdef object _nsmap + cdef object _element_factory + cdef bint _annotate + + def __call__(self, *children, **attrib): + "__call__(self, *children, **attrib)" + cdef _ObjectifyElementMakerCaller elementMaker + cdef _Element element + cdef _Element childElement + cdef bint has_children + cdef bint has_string_value + if self._element_factory is None: + element = _makeElement(self._tag, None, attrib, self._nsmap) + else: + element = self._element_factory(self._tag, attrib, self._nsmap) + + pytype_name = None + has_children = False + has_string_value = False + for child in children: + if child is None: + if len(children) == 1: + cetree.setAttributeValue( + element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true") + elif python._isString(child): + _add_text(element, child) + has_string_value = True + elif isinstance(child, _Element): + cetree.appendChildToElement(element, <_Element>child) + has_children = True + elif isinstance(child, _ObjectifyElementMakerCaller): + elementMaker = <_ObjectifyElementMakerCaller>child + if elementMaker._element_factory is None: + cetree.makeSubElement(element, elementMaker._tag, + None, None, None, None) + else: + childElement = elementMaker._element_factory( + elementMaker._tag) + cetree.appendChildToElement(element, childElement) + has_children = True + elif isinstance(child, dict): + for name, value in child.items(): + # keyword arguments in attrib take precedence + if name in attrib: + continue + pytype = _PYTYPE_DICT.get(_typename(value)) + if pytype is not None: + value = (<PyType>pytype).stringify(value) + elif not python._isString(value): + value = unicode(value) + cetree.setAttributeValue(element, name, value) + else: + if pytype_name is not None: + # concatenation always makes the result a string + has_string_value = True + pytype_name = _typename(child) + pytype = _PYTYPE_DICT.get(_typename(child)) + if pytype is not None: + _add_text(element, (<PyType>pytype).stringify(child)) + else: + has_string_value = True + child = unicode(child) + _add_text(element, child) + + if self._annotate and not has_children: + if has_string_value: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str") + elif pytype_name is not None: + cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name) + + return element + +cdef _add_text(_Element elem, text): + # add text to the tree in construction, either as element text or + # tail text, depending on the current tree state + cdef tree.xmlNode* c_child + c_child = cetree.findChildBackwards(elem._c_node, 0) + if c_child is not NULL: + old = cetree.tailOf(c_child) + if old is not None: + text = old + text + cetree.setTailText(c_child, text) + else: + old = cetree.textOf(elem._c_node) + if old is not None: + text = old + text + cetree.setNodeText(elem._c_node, text) + +cdef class ElementMaker: + """ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None) + + An ElementMaker that can be used for constructing trees. + + Example:: + + >>> M = ElementMaker(annotate=False) + >>> attributes = {'class': 'par'} + >>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) ) + + >>> from lxml.etree import tostring + >>> print(tostring(html, method='html').decode('ascii')) + <html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html> + + To create tags that are not valid Python identifiers, call the factory + directly and pass the tag name as first argument:: + + >>> root = M('tricky-tag', 'some text') + >>> print(root.tag) + tricky-tag + >>> print(root.text) + some text + + Note that this module has a predefined ElementMaker instance called ``E``. + """ + cdef object _makeelement + cdef object _namespace + cdef object _nsmap + cdef bint _annotate + cdef dict _cache + def __init__(self, *, namespace=None, nsmap=None, annotate=True, + makeelement=None): + if nsmap is None: + nsmap = _DEFAULT_NSMAP if annotate else {} + self._nsmap = nsmap + self._namespace = None if namespace is None else "{%s}" % namespace + self._annotate = annotate + if makeelement is not None: + if not callable(makeelement): + raise TypeError( + f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}") + self._makeelement = makeelement + else: + self._makeelement = None + self._cache = {} + + @cython.final + cdef _build_element_maker(self, tag, bint caching): + cdef _ObjectifyElementMakerCaller element_maker + element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller) + if self._namespace is not None and tag[0] != "{": + element_maker._tag = self._namespace + tag + else: + element_maker._tag = tag + element_maker._nsmap = self._nsmap + element_maker._annotate = self._annotate + element_maker._element_factory = self._makeelement + if caching: + if len(self._cache) > 200: + self._cache.clear() + self._cache[tag] = element_maker + return element_maker + + def __getattr__(self, tag): + element_maker = self._cache.get(tag) + if element_maker is None: + return self._build_element_maker(tag, caching=True) + return element_maker + + def __call__(self, tag, *args, **kwargs): + element_maker = self._cache.get(tag) + if element_maker is None: + element_maker = self._build_element_maker( + tag, caching=not is_special_method(tag)) + return element_maker(*args, **kwargs) + +################################################################################ +# Recursive element dumping + +cdef bint __RECURSIVE_STR = 0 # default: off + +def enable_recursive_str(on=True): + """enable_recursive_str(on=True) + + Enable a recursively generated tree representation for str(element), + based on objectify.dump(element). + """ + global __RECURSIVE_STR + __RECURSIVE_STR = on + +def dump(_Element element not None): + """dump(_Element element not None) + + Return a recursively generated string representation of an element. + """ + return _dump(element, 0) + +cdef object _dump(_Element element, int indent): + indentstr = " " * indent + if isinstance(element, ObjectifiedDataElement): + value = repr(element) + else: + value = textOf(element._c_node) + if value is not None: + if not value.strip(): + value = None + else: + value = repr(value) + result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n" + xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS + pytype_ns = "{%s}" % PYTYPE_NAMESPACE + for name, value in sorted(cetree.iterattributes(element, 3)): + if '{' in name: + if name == PYTYPE_ATTRIBUTE: + if value == TREE_PYTYPE_NAME: + continue + else: + name = name.replace(pytype_ns, 'py:') + name = name.replace(xsi_ns, 'xsi:') + result += f"{indentstr} * {name} = {value!r}\n" + + indent += 1 + for child in element.iterchildren(): + result += _dump(child, indent) + if indent == 1: + return result[:-1] # strip last '\n' + else: + return result + + +################################################################################ +# Pickle support for objectified ElementTree + +def __unpickleElementTree(data): + return etree.ElementTree(fromstring(data)) + +cdef _setupPickle(elementTreeReduceFunction): + import copyreg + copyreg.pickle(etree._ElementTree, + elementTreeReduceFunction, __unpickleElementTree) + +def pickleReduceElementTree(obj): + return __unpickleElementTree, (etree.tostring(obj),) + +_setupPickle(pickleReduceElementTree) +del pickleReduceElementTree + +################################################################################ +# Element class lookup + +cdef class ObjectifyElementClassLookup(ElementClassLookup): + """ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None) + Element class lookup method that uses the objectify classes. + """ + cdef object empty_data_class + cdef object tree_class + def __init__(self, tree_class=None, empty_data_class=None): + """Lookup mechanism for objectify. + + The default Element classes can be replaced by passing subclasses of + ObjectifiedElement and ObjectifiedDataElement as keyword arguments. + 'tree_class' defines inner tree classes (defaults to + ObjectifiedElement), 'empty_data_class' defines the default class for + empty data elements (defaults to StringElement). + """ + self._lookup_function = _lookupElementClass + if tree_class is None: + tree_class = ObjectifiedElement + self.tree_class = tree_class + if empty_data_class is None: + empty_data_class = StringElement + self.empty_data_class = empty_data_class + +cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node): + cdef ObjectifyElementClassLookup lookup + lookup = <ObjectifyElementClassLookup>state + # if element has children => no data class + if cetree.hasChild(c_node): + return lookup.tree_class + + # if element is defined as xsi:nil, return NoneElement class + if "true" == cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"): + return NoneElement + + # check for Python type hint + value = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if value is not None: + if value == TREE_PYTYPE_NAME: + return lookup.tree_class + py_type = <PyType>_PYTYPE_DICT.get(value) + if py_type is not None: + return py_type._type + # unknown 'pyval' => try to figure it out ourself, just go on + + # check for XML Schema type hint + value = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") + + if value is not None: + schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) + if schema_type is None and ':' in value: + prefix, value = value.split(':', 1) + schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value) + if schema_type is not None: + return schema_type._type + + # otherwise determine class based on text content type + el_class = _guessElementClass(c_node) + if el_class is not None: + return el_class + + # if element is a root node => default to tree node + if c_node.parent is NULL or not tree._isElement(c_node.parent): + return lookup.tree_class + + return lookup.empty_data_class + + +################################################################################ +# Type annotations + +cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype): + if pytype is None: + return None + value = textOf(c_node) + try: + pytype.type_check(value) + return pytype + except IGNORABLE_ERRORS: + # could not be parsed as the specified type => ignore + pass + return None + +def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False, + empty_pytype=None): + """pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None) + + Recursively annotates the elements of an XML tree with 'pytype' + attributes. + + If the 'ignore_old' keyword argument is True (the default), current 'pytype' + attributes will be ignored and replaced. Otherwise, they will be checked + and only replaced if they no longer fit the current text value. + + Setting the keyword argument ``ignore_xsi`` to True makes the function + additionally ignore existing ``xsi:type`` annotations. The default is to + use them as a type hint. + + The default annotation of empty elements can be set with the + ``empty_pytype`` keyword argument. The default is not to annotate empty + elements. Pass 'str', for example, to make string values the default. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype) + +def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False, + empty_type=None): + """xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None) + + Recursively annotates the elements of an XML tree with 'xsi:type' + attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'xsi:type' attributes will be ignored and replaced. Otherwise, they will be + checked and only replaced if they no longer fit the current text value. + + Note that the mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first if you define additional types. + + Setting the keyword argument ``ignore_pytype`` to True makes the function + additionally ignore existing ``pytype`` annotations. The default is to + use them as a type hint. + + The default annotation of empty elements can be set with the + ``empty_type`` keyword argument. The default is not to annotate empty + elements. Pass 'string', for example, to make string values the default. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None) + +def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False, + empty_pytype=None, empty_type=None, annotate_xsi=0, + annotate_pytype=1): + """annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1) + + Recursively annotates the elements of an XML tree with 'xsi:type' + and/or 'py:pytype' attributes. + + If the 'ignore_old' keyword argument is True (the default), current + 'py:pytype' attributes will be ignored for the type annotation. Set to False + if you want reuse existing 'py:pytype' information (iff appropriate for the + element text value). + + If the 'ignore_xsi' keyword argument is False (the default), existing + 'xsi:type' attributes will be used for the type annotation, if they fit the + element text values. + + Note that the mapping from Python types to XSI types is usually ambiguous. + Currently, only the first XSI type name in the corresponding PyType + definition will be used for annotation. Thus, you should consider naming + the widest type first if you define additional types. + + The default 'py:pytype' annotation of empty elements can be set with the + ``empty_pytype`` keyword argument. Pass 'str', for example, to make + string values the default. + + The default 'xsi:type' annotation of empty elements can be set with the + ``empty_type`` keyword argument. The default is not to annotate empty + elements. Pass 'string', for example, to make string values the default. + + The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype' + (default: 1) control which kind(s) of annotation to use. + """ + cdef _Element element + element = cetree.rootNodeOrRaise(element_or_tree) + _annotate(element, annotate_xsi, annotate_pytype, ignore_xsi, + ignore_old, empty_type, empty_pytype) + + +cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, + empty_type_name, empty_pytype_name): + cdef _Document doc + cdef tree.xmlNode* c_node + cdef PyType empty_pytype, StrType, NoneType + + if not annotate_xsi and not annotate_pytype: + return + + if empty_type_name is not None: + if isinstance(empty_type_name, bytes): + empty_type_name = (<bytes>empty_type_name).decode("ascii") + empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name) + elif empty_pytype_name is not None: + if isinstance(empty_pytype_name, bytes): + empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii") + empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name) + else: + empty_pytype = None + + StrType = <PyType>_PYTYPE_DICT.get('str') + NoneType = <PyType>_PYTYPE_DICT.get('NoneType') + + doc = element._doc + c_node = element._c_node + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + _annotate_element(c_node, doc, annotate_xsi, annotate_pytype, + ignore_xsi, ignore_pytype, + empty_type_name, empty_pytype, StrType, NoneType) + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + +cdef int _annotate_element(tree.xmlNode* c_node, _Document doc, + bint annotate_xsi, bint annotate_pytype, + bint ignore_xsi, bint ignore_pytype, + empty_type_name, PyType empty_pytype, + PyType StrType, PyType NoneType) except -1: + cdef tree.xmlNs* c_ns + cdef PyType pytype = None + typename = None + istree = 0 + + # if element is defined as xsi:nil, represent it as None + if cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true": + pytype = NoneType + + if pytype is None and not ignore_xsi: + # check that old xsi type value is valid + typename = cetree.attributeValueFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") + if typename is not None: + pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) + if pytype is None and ':' in typename: + prefix, typename = typename.split(':', 1) + pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename) + if pytype is not None and pytype is not StrType: + # StrType does not have a typecheck but is the default + # anyway, so just accept it if given as type + # information + pytype = _check_type(c_node, pytype) + if pytype is None: + typename = None + + if pytype is None and not ignore_pytype: + # check that old pytype value is valid + old_pytypename = cetree.attributeValueFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + if old_pytypename is not None: + if old_pytypename == TREE_PYTYPE_NAME: + if not cetree.hasChild(c_node): + # only case where we should keep it, + # everything else is clear enough + pytype = TREE_PYTYPE + else: + if old_pytypename == 'none': + # transition from lxml 1.x + old_pytypename = "NoneType" + pytype = <PyType>_PYTYPE_DICT.get(old_pytypename) + if pytype is not None and pytype is not StrType: + # StrType does not have a typecheck but is the + # default anyway, so just accept it if given as + # type information + pytype = _check_type(c_node, pytype) + + if pytype is None: + # try to guess type + if not cetree.hasChild(c_node): + # element has no children => data class + pytype = _guessPyType(textOf(c_node), StrType) + else: + istree = 1 + + if pytype is None: + # use default type for empty elements + if cetree.hasText(c_node): + pytype = StrType + else: + pytype = empty_pytype + if typename is None: + typename = empty_type_name + + if pytype is not None: + if typename is None: + if not istree: + if pytype._schema_types: + # pytype->xsi:type is a 1:n mapping + # simply take the first + typename = pytype._schema_types[0] + elif typename not in pytype._schema_types: + typename = pytype._schema_types[0] + + if annotate_xsi: + if typename is None or istree: + cetree.delAttributeFromNsName( + c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type") + else: + # update or create attribute + typename_utf8 = cetree.utf8(typename) + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd') + if c_ns is not NULL: + if b':' in typename_utf8: + prefix, name = typename_utf8.split(b':', 1) + if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0': + typename_utf8 = name + elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0: + typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name + elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0': + typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8 + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') + tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8)) + + if annotate_pytype: + if pytype is None: + # delete attribute if it exists + cetree.delAttributeFromNsName( + c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME) + else: + # update or create attribute + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py') + pytype_name = cetree.utf8(pytype.name) + tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME, + _xcstr(pytype_name)) + if pytype is NoneType: + c_ns = cetree.findOrBuildNodeNsPrefix( + doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi') + tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true") + + return 0 + +cdef object _strip_attributes = etree.strip_attributes +cdef object _cleanup_namespaces = etree.cleanup_namespaces + +def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True, + bint xsi_nil=False, bint cleanup_namespaces=False): + """deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False) + + Recursively de-annotate the elements of an XML tree by removing 'py:pytype' + and/or 'xsi:type' attributes and/or 'xsi:nil' attributes. + + If the 'pytype' keyword argument is True (the default), 'py:pytype' + attributes will be removed. If the 'xsi' keyword argument is True (the + default), 'xsi:type' attributes will be removed. + If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil' + attributes will be removed. + + Note that this does not touch the namespace declarations by + default. If you want to remove unused namespace declarations from + the tree, pass the option ``cleanup_namespaces=True``. + """ + cdef list attribute_names = [] + + if pytype: + attribute_names.append(PYTYPE_ATTRIBUTE) + if xsi: + attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR) + if xsi_nil: + attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR) + + _strip_attributes(element_or_tree, *attribute_names) + if cleanup_namespaces: + _cleanup_namespaces(element_or_tree) + +################################################################################ +# Module level parser setup + +cdef object __DEFAULT_PARSER +__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True) +__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() ) + +cdef object objectify_parser +objectify_parser = __DEFAULT_PARSER + +def set_default_parser(new_parser = None): + """set_default_parser(new_parser = None) + + Replace the default parser used by objectify's Element() and + fromstring() functions. + + The new parser must be an etree.XMLParser. + + Call without arguments to reset to the original parser. + """ + global objectify_parser + if new_parser is None: + objectify_parser = __DEFAULT_PARSER + elif isinstance(new_parser, etree.XMLParser): + objectify_parser = new_parser + else: + raise TypeError, "parser must inherit from lxml.etree.XMLParser" + +def makeparser(**kw): + """makeparser(remove_blank_text=True, **kw) + + Create a new XML parser for objectify trees. + + You can pass all keyword arguments that are supported by + ``etree.XMLParser()``. Note that this parser defaults to removing + blank text. You can disable this by passing the + ``remove_blank_text`` boolean keyword option yourself. + """ + if 'remove_blank_text' not in kw: + kw['remove_blank_text'] = True + parser = etree.XMLParser(**kw) + parser.set_element_class_lookup( ObjectifyElementClassLookup() ) + return parser + +cdef _Element _makeElement(tag, text, attrib, nsmap): + return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap) + +################################################################################ +# Module level factory functions + +cdef object _fromstring +_fromstring = etree.fromstring + +SubElement = etree.SubElement + +def fromstring(xml, parser=None, *, base_url=None): + """fromstring(xml, parser=None, base_url=None) + + Objectify specific version of the lxml.etree fromstring() function + that uses the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + if parser is None: + parser = objectify_parser + return _fromstring(xml, parser, base_url=base_url) + +def XML(xml, parser=None, *, base_url=None): + """XML(xml, parser=None, base_url=None) + + Objectify specific version of the lxml.etree XML() literal factory + that uses the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword argument allows to set the original base URL of + the document to support relative Paths when looking up external entities + (DTD, XInclude, ...). + """ + if parser is None: + parser = objectify_parser + return _fromstring(xml, parser, base_url=base_url) + +cdef object _parse +_parse = etree.parse + +def parse(f, parser=None, *, base_url=None): + """parse(f, parser=None, base_url=None) + + Parse a file or file-like object with the objectify parser. + + You can pass a different parser as second argument. + + The ``base_url`` keyword allows setting a URL for the document + when parsing from a file-like object. This is needed when looking + up external entities (DTD, XInclude, ...) with relative paths. + """ + if parser is None: + parser = objectify_parser + return _parse(f, parser, base_url=base_url) + +cdef dict _DEFAULT_NSMAP = { + "py" : PYTYPE_NAMESPACE, + "xsi" : XML_SCHEMA_INSTANCE_NS, + "xsd" : XML_SCHEMA_NS +} + +E = ElementMaker() + +def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes): + """Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes) + + Objectify specific version of the lxml.etree Element() factory that + always creates a structural (tree) element. + + NOTE: requires parser based element class lookup activated in lxml.etree! + """ + if attrib is not None: + if _attributes: + attrib = dict(attrib) + attrib.update(_attributes) + _attributes = attrib + if _pytype is None: + _pytype = TREE_PYTYPE_NAME + if nsmap is None: + nsmap = _DEFAULT_NSMAP + _attributes[PYTYPE_ATTRIBUTE] = _pytype + return _makeElement(_tag, None, _attributes, nsmap) + +def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None, + **_attributes): + """DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes) + + Create a new element from a Python value and XML attributes taken from + keyword arguments or a dictionary passed as second argument. + + Automatically adds a 'pytype' attribute for the Python type of the value, + if the type can be identified. If '_pytype' or '_xsi' are among the + keyword arguments, they will be used instead. + + If the _value argument is an ObjectifiedDataElement instance, its py:pytype, + xsi:type and other attributes and nsmap are reused unless they are redefined + in attrib and/or keyword arguments. + """ + if nsmap is None: + nsmap = _DEFAULT_NSMAP + if attrib is not None and attrib: + if _attributes: + attrib = dict(attrib) + attrib.update(_attributes) + _attributes = attrib + if isinstance(_value, ObjectifiedElement): + if _pytype is None: + if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP: + # special case: no change! + return _value.__copy__() + if isinstance(_value, ObjectifiedDataElement): + # reuse existing nsmap unless redefined in nsmap parameter + temp = _value.nsmap + if temp is not None and temp: + temp = dict(temp) + temp.update(nsmap) + nsmap = temp + # reuse existing attributes unless redefined in attrib/_attributes + temp = _value.attrib + if temp is not None and temp: + temp = dict(temp) + temp.update(_attributes) + _attributes = temp + # reuse existing xsi:type or py:pytype attributes, unless provided as + # arguments + if _xsi is None and _pytype is None: + _xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR) + _pytype = _attributes.get(PYTYPE_ATTRIBUTE) + + if _xsi is not None: + if ':' in _xsi: + prefix, name = _xsi.split(':', 1) + ns = nsmap.get(prefix) + if ns != XML_SCHEMA_NS: + raise ValueError, "XSD types require the XSD namespace" + elif nsmap is _DEFAULT_NSMAP: + name = _xsi + _xsi = 'xsd:' + _xsi + else: + name = _xsi + for prefix, ns in nsmap.items(): + if ns == XML_SCHEMA_NS: + if prefix is not None and prefix: + _xsi = prefix + ':' + _xsi + break + else: + raise ValueError, "XSD types require the XSD namespace" + _attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi + if _pytype is None: + # allow using unregistered or even wrong xsi:type names + py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi) + if py_type is None: + py_type = <PyType>_SCHEMA_TYPE_DICT.get(name) + if py_type is not None: + _pytype = py_type.name + + if _pytype is None: + _pytype = _pytypename(_value) + + if _value is None and _pytype != "str": + _pytype = _pytype or "NoneType" + strval = None + elif python._isString(_value): + strval = _value + elif isinstance(_value, bool): + if _value: + strval = "true" + else: + strval = "false" + else: + py_type = <PyType>_PYTYPE_DICT.get(_pytype) + stringify = unicode if py_type is None else py_type.stringify + strval = stringify(_value) + + if _pytype is not None: + if _pytype == "NoneType" or _pytype == "none": + strval = None + _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true" + else: + # check if type information from arguments is valid + py_type = <PyType>_PYTYPE_DICT.get(_pytype) + if py_type is not None: + if py_type.type_check is not None: + py_type.type_check(strval) + _attributes[PYTYPE_ATTRIBUTE] = _pytype + + return _makeElement("value", strval, _attributes, nsmap) + + +################################################################################ +# ObjectPath + +include "objectpath.pxi" diff --git a/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi b/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi new file mode 100644 index 00000000..e562a365 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/objectpath.pxi @@ -0,0 +1,332 @@ +################################################################################ +# ObjectPath + +ctypedef struct _ObjectPath: + const_xmlChar* href + const_xmlChar* name + Py_ssize_t index + + +cdef object _NO_DEFAULT = object() + + +cdef class ObjectPath: + """ObjectPath(path) + Immutable object that represents a compiled object path. + + Example for a path: 'root.child[1].{other}child[25]' + """ + cdef readonly object find + cdef list _path + cdef object _path_str + cdef _ObjectPath* _c_path + cdef Py_ssize_t _path_len + def __init__(self, path): + if python._isString(path): + self._path = _parse_object_path_string(path) + self._path_str = path + else: + self._path = _parse_object_path_list(path) + self._path_str = '.'.join(path) + self._path_len = len(self._path) + self._c_path = _build_object_path_segments(self._path) + self.find = self.__call__ + + def __dealloc__(self): + if self._c_path is not NULL: + python.lxml_free(self._c_path) + + def __str__(self): + return self._path_str + + def __call__(self, _Element root not None, *_default): + """Follow the attribute path in the object structure and return the + target attribute value. + + If it it not found, either returns a default value (if one was passed + as second argument) or raises AttributeError. + """ + if _default: + if len(_default) > 1: + raise TypeError, "invalid number of arguments: needs one or two" + default = _default[0] + else: + default = _NO_DEFAULT + return _find_object_path(root, self._c_path, self._path_len, default) + + def hasattr(self, _Element root not None): + "hasattr(self, root)" + try: + _find_object_path(root, self._c_path, self._path_len, _NO_DEFAULT) + except AttributeError: + return False + return True + + def setattr(self, _Element root not None, value): + """setattr(self, root, value) + + Set the value of the target element in a subtree. + + If any of the children on the path does not exist, it is created. + """ + _create_object_path(root, self._c_path, self._path_len, 1, value) + + def addattr(self, _Element root not None, value): + """addattr(self, root, value) + + Append a value to the target element in a subtree. + + If any of the children on the path does not exist, it is created. + """ + _create_object_path(root, self._c_path, self._path_len, 0, value) + + +cdef object __MATCH_PATH_SEGMENT = re.compile( + r"(\.?)\s*(?:\{([^}]*)\})?\s*([^.{}\[\]\s]+)\s*(?:\[\s*([-0-9]+)\s*\])?", + re.U).match + +cdef tuple _RELATIVE_PATH_SEGMENT = (None, None, 0) + + +cdef list _parse_object_path_string(_path): + """Parse object path string into a (ns, name, index) list. + """ + cdef bint has_dot + cdef unicode path + new_path = [] + if isinstance(_path, bytes): + path = (<bytes>_path).decode('ascii') + elif type(_path) is not unicode: + path = unicode(_path) + else: + path = _path + path = path.strip() + if path == '.': + return [_RELATIVE_PATH_SEGMENT] + path_pos = 0 + while path: + match = __MATCH_PATH_SEGMENT(path, path_pos) + if match is None: + break + + dot, ns, name, index = match.groups() + index = int(index) if index else 0 + has_dot = dot == '.' + if not new_path: + if has_dot: + # path '.child' => ignore root + new_path.append(_RELATIVE_PATH_SEGMENT) + elif index: + raise ValueError, "index not allowed on root node" + elif not has_dot: + raise ValueError, "invalid path" + if ns is not None: + ns = python.PyUnicode_AsUTF8String(ns) + name = python.PyUnicode_AsUTF8String(name) + new_path.append( (ns, name, index) ) + + path_pos = match.end() + if not new_path or len(path) > path_pos: + raise ValueError, "invalid path" + return new_path + + +cdef list _parse_object_path_list(path): + """Parse object path sequence into a (ns, name, index) list. + """ + new_path = [] + for item in path: + item = item.strip() + if not new_path and item == '': + # path '.child' => ignore root + ns = name = None + index = 0 + else: + ns, name = cetree.getNsTag(item) + c_name = _xcstr(name) + index_pos = tree.xmlStrchr(c_name, c'[') + if index_pos is NULL: + index = 0 + else: + index_end = tree.xmlStrchr(index_pos + 1, c']') + if index_end is NULL: + raise ValueError, "index must be enclosed in []" + index = int(index_pos[1:index_end - index_pos]) + if not new_path and index != 0: + raise ValueError, "index not allowed on root node" + name = <bytes>c_name[:index_pos - c_name] + new_path.append( (ns, name, index) ) + if not new_path: + raise ValueError, "invalid path" + return new_path + + +cdef _ObjectPath* _build_object_path_segments(list path_list) except NULL: + cdef _ObjectPath* c_path + cdef _ObjectPath* c_path_segments + c_path_segments = <_ObjectPath*>python.lxml_malloc(len(path_list), sizeof(_ObjectPath)) + if c_path_segments is NULL: + raise MemoryError() + c_path = c_path_segments + for href, name, index in path_list: + c_path[0].href = _xcstr(href) if href is not None else NULL + c_path[0].name = _xcstr(name) if name is not None else NULL + c_path[0].index = index + c_path += 1 + return c_path_segments + + +cdef _find_object_path(_Element root, _ObjectPath* c_path, Py_ssize_t c_path_len, default_value): + """Follow the path to find the target element. + """ + cdef tree.xmlNode* c_node + cdef Py_ssize_t c_index + c_node = root._c_node + c_name = c_path[0].name + c_href = c_path[0].href + if c_href is NULL or c_href[0] == c'\0': + c_href = tree._getNs(c_node) + if not cetree.tagMatches(c_node, c_href, c_name): + if default_value is not _NO_DEFAULT: + return default_value + else: + raise ValueError( + f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}") + + while c_node is not NULL: + c_path_len -= 1 + if c_path_len <= 0: + break + + c_path += 1 + if c_path[0].href is not NULL: + c_href = c_path[0].href # otherwise: keep parent namespace + c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1) + if c_name is NULL: + c_name = c_path[0].name + c_node = NULL + break + c_index = c_path[0].index + c_node = c_node.last if c_index < 0 else c_node.children + c_node = _findFollowingSibling(c_node, c_href, c_name, c_index) + + if c_node is not NULL: + return cetree.elementFactory(root._doc, c_node) + elif default_value is not _NO_DEFAULT: + return default_value + else: + tag = cetree.namespacedNameFromNsName(c_href, c_name) + raise AttributeError, f"no such child: {tag}" + + +cdef _create_object_path(_Element root, _ObjectPath* c_path, + Py_ssize_t c_path_len, int replace, value): + """Follow the path to find the target element, build the missing children + as needed and set the target element to 'value'. If replace is true, an + existing value is replaced, otherwise the new value is added. + """ + cdef _Element child + cdef tree.xmlNode* c_node + cdef tree.xmlNode* c_child + cdef Py_ssize_t c_index + if c_path_len == 1: + raise TypeError, "cannot update root node" + + c_node = root._c_node + c_name = c_path[0].name + c_href = c_path[0].href + if c_href is NULL or c_href[0] == c'\0': + c_href = tree._getNs(c_node) + if not cetree.tagMatches(c_node, c_href, c_name): + raise ValueError( + f"root element does not match: need {cetree.namespacedNameFromNsName(c_href, c_name)}, got {root.tag}") + + while c_path_len > 1: + c_path_len -= 1 + c_path += 1 + if c_path[0].href is not NULL: + c_href = c_path[0].href # otherwise: keep parent namespace + c_index = c_path[0].index + c_name = tree.xmlDictExists(c_node.doc.dict, c_path[0].name, -1) + if c_name is NULL: + c_name = c_path[0].name + c_child = NULL + else: + c_child = c_node.last if c_index < 0 else c_node.children + c_child = _findFollowingSibling(c_child, c_href, c_name, c_index) + + if c_child is not NULL: + c_node = c_child + elif c_index != 0: + raise TypeError, "creating indexed path attributes is not supported" + elif c_path_len == 1: + _appendValue(cetree.elementFactory(root._doc, c_node), + cetree.namespacedNameFromNsName(c_href, c_name), + value) + return + else: + child = cetree.makeSubElement( + cetree.elementFactory(root._doc, c_node), + cetree.namespacedNameFromNsName(c_href, c_name), + None, None, None, None) + c_node = child._c_node + + # if we get here, the entire path was already there + if replace: + element = cetree.elementFactory(root._doc, c_node) + _replaceElement(element, value) + else: + _appendValue(cetree.elementFactory(root._doc, c_node.parent), + cetree.namespacedName(c_node), value) + + +cdef list _build_descendant_paths(tree.xmlNode* c_node, prefix_string): + """Returns a list of all descendant paths. + """ + cdef list path, path_list + tag = cetree.namespacedName(c_node) + if prefix_string: + if prefix_string[-1] != '.': + prefix_string += '.' + prefix_string = prefix_string + tag + else: + prefix_string = tag + path = [prefix_string] + path_list = [] + _recursive_build_descendant_paths(c_node, path, path_list) + return path_list + + +cdef int _recursive_build_descendant_paths(tree.xmlNode* c_node, + list path, list path_list) except -1: + """Fills the list 'path_list' with all descendant paths, initial prefix + being in the list 'path'. + """ + cdef tree.xmlNode* c_child + tags = {} + path_list.append('.'.join(path)) + c_href = tree._getNs(c_node) + c_child = c_node.children + while c_child is not NULL: + while c_child.type != tree.XML_ELEMENT_NODE: + c_child = c_child.next + if c_child is NULL: + return 0 + if c_href is tree._getNs(c_child): + tag = pyunicode(c_child.name) + elif c_href is not NULL and tree._getNs(c_child) is NULL: + # special case: parent has namespace, child does not + tag = '{}' + pyunicode(c_child.name) + else: + tag = cetree.namespacedName(c_child) + count = tags.get(tag) + if count is None: + tags[tag] = 1 + else: + tags[tag] = count + 1 + tag += f'[{count}]' + path.append(tag) + _recursive_build_descendant_paths(c_child, path, path_list) + del path[-1] + c_child = c_child.next + return 0 diff --git a/.venv/lib/python3.12/site-packages/lxml/parser.pxi b/.venv/lib/python3.12/site-packages/lxml/parser.pxi new file mode 100644 index 00000000..70337d87 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/parser.pxi @@ -0,0 +1,2000 @@ +# Parsers for XML and HTML + +from lxml.includes cimport xmlparser +from lxml.includes cimport htmlparser + + +class ParseError(LxmlSyntaxError): + """Syntax error while parsing an XML document. + + For compatibility with ElementTree 1.3 and later. + """ + def __init__(self, message, code, line, column, filename=None): + super(_ParseError, self).__init__(message) + self.lineno, self.offset = (line, column - 1) + self.code = code + self.filename = filename + + @property + def position(self): + return self.lineno, self.offset + 1 + + @position.setter + def position(self, new_pos): + self.lineno, column = new_pos + self.offset = column - 1 + +cdef object _ParseError = ParseError + + +class XMLSyntaxError(ParseError): + """Syntax error while parsing an XML document. + """ + +cdef class ParserError(LxmlError): + """Internal lxml parser error. + """ + + +@cython.final +@cython.internal +cdef class _ParserDictionaryContext: + # Global parser context to share the string dictionary. + # + # This class is a delegate singleton! + # + # It creates _ParserDictionaryContext objects for each thread to keep thread state, + # but those must never be used directly. Always stick to using the static + # __GLOBAL_PARSER_CONTEXT as defined below the class. + # + + cdef tree.xmlDict* _c_dict + cdef _BaseParser _default_parser + cdef list _implied_parser_contexts + + def __cinit__(self): + self._c_dict = NULL + self._implied_parser_contexts = [] + + def __dealloc__(self): + if self._c_dict is not NULL: + xmlparser.xmlDictFree(self._c_dict) + + cdef int initMainParserContext(self) except -1: + """Put the global context into the thread dictionary of the main + thread. To be called once and only in the main thread.""" + thread_dict = python.PyThreadState_GetDict() + if thread_dict is not NULL: + (<dict>thread_dict)["_ParserDictionaryContext"] = self + + cdef _ParserDictionaryContext _findThreadParserContext(self): + "Find (or create) the _ParserDictionaryContext object for the current thread" + cdef _ParserDictionaryContext context + thread_dict = python.PyThreadState_GetDict() + if thread_dict is NULL: + return self + d = <dict>thread_dict + result = python.PyDict_GetItem(d, "_ParserDictionaryContext") + if result is not NULL: + return <object>result + context = <_ParserDictionaryContext>_ParserDictionaryContext.__new__(_ParserDictionaryContext) + d["_ParserDictionaryContext"] = context + return context + + cdef int setDefaultParser(self, _BaseParser parser) except -1: + "Set the default parser for the current thread" + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + context._default_parser = parser + + cdef _BaseParser getDefaultParser(self): + "Return (or create) the default parser of the current thread" + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + if context._default_parser is None: + if self._default_parser is None: + self._default_parser = __DEFAULT_XML_PARSER._copy() + if context is not self: + context._default_parser = self._default_parser._copy() + return context._default_parser + + cdef tree.xmlDict* _getThreadDict(self, tree.xmlDict* default): + "Return the thread-local dict or create a new one if necessary." + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + if context._c_dict is NULL: + # thread dict not yet set up => use default or create a new one + if default is not NULL: + context._c_dict = default + xmlparser.xmlDictReference(default) + return default + if self._c_dict is NULL: + self._c_dict = xmlparser.xmlDictCreate() + if context is not self: + context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict) + return context._c_dict + + cdef int initThreadDictRef(self, tree.xmlDict** c_dict_ref) except -1: + c_dict = c_dict_ref[0] + c_thread_dict = self._getThreadDict(c_dict) + if c_dict is c_thread_dict: + return 0 + if c_dict is not NULL: + xmlparser.xmlDictFree(c_dict) + c_dict_ref[0] = c_thread_dict + xmlparser.xmlDictReference(c_thread_dict) + + cdef int initParserDict(self, xmlparser.xmlParserCtxt* pctxt) except -1: + "Assure we always use the same string dictionary." + self.initThreadDictRef(&pctxt.dict) + pctxt.dictNames = 1 + + cdef int initXPathParserDict(self, xpath.xmlXPathContext* pctxt) except -1: + "Assure we always use the same string dictionary." + self.initThreadDictRef(&pctxt.dict) + + cdef int initDocDict(self, xmlDoc* result) except -1: + "Store dict of last object parsed if no shared dict yet" + # XXX We also free the result dict here if there already was one. + # This case should only occur for new documents with empty dicts, + # otherwise we'd free data that's in use => segfault + self.initThreadDictRef(&result.dict) + + cdef _ParserContext findImpliedContext(self): + """Return any current implied xml parser context for the current + thread. This is used when the resolver functions are called + with an xmlParserCtxt that was generated from within libxml2 + (i.e. without a _ParserContext) - which happens when parsing + schema and xinclude external references.""" + cdef _ParserDictionaryContext context + cdef _ParserContext implied_context + + # see if we have a current implied parser + context = self._findThreadParserContext() + if context._implied_parser_contexts: + implied_context = context._implied_parser_contexts[-1] + return implied_context + return None + + cdef int pushImpliedContextFromParser(self, _BaseParser parser) except -1: + "Push a new implied context object taken from the parser." + if parser is not None: + self.pushImpliedContext(parser._getParserContext()) + else: + self.pushImpliedContext(None) + + cdef int pushImpliedContext(self, _ParserContext parser_context) except -1: + "Push a new implied context object." + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + context._implied_parser_contexts.append(parser_context) + + cdef int popImpliedContext(self) except -1: + "Pop the current implied context object." + cdef _ParserDictionaryContext context + context = self._findThreadParserContext() + context._implied_parser_contexts.pop() + +cdef _ParserDictionaryContext __GLOBAL_PARSER_CONTEXT = _ParserDictionaryContext() +__GLOBAL_PARSER_CONTEXT.initMainParserContext() + +############################################################ +## support for Python unicode I/O +############################################################ + +# name of Python Py_UNICODE encoding as known to libxml2 +cdef const_char* _PY_UNICODE_ENCODING = NULL + +cdef int _setupPythonUnicode() except -1: + """Sets _PY_UNICODE_ENCODING to the internal encoding name of Python unicode + strings if libxml2 supports reading native Python unicode. This depends + on iconv and the local Python installation, so we simply check if we find + a matching encoding handler. + """ + cdef tree.xmlCharEncodingHandler* enchandler + cdef Py_ssize_t l + cdef const_char* enc + cdef Py_UNICODE *uchars = [c'<', c't', c'e', c's', c't', c'/', c'>'] + cdef const_xmlChar* buffer = <const_xmlChar*>uchars + # apparently, libxml2 can't detect UTF-16 on some systems + if (buffer[0] == c'<' and buffer[1] == c'\0' and + buffer[2] == c't' and buffer[3] == c'\0'): + enc = "UTF-16LE" + elif (buffer[0] == c'\0' and buffer[1] == c'<' and + buffer[2] == c'\0' and buffer[3] == c't'): + enc = "UTF-16BE" + else: + # let libxml2 give it a try + enc = _findEncodingName(buffer, sizeof(Py_UNICODE) * 7) + if enc is NULL: + # not my fault, it's YOUR broken system :) + return 0 + enchandler = tree.xmlFindCharEncodingHandler(enc) + if enchandler is not NULL: + global _PY_UNICODE_ENCODING + tree.xmlCharEncCloseFunc(enchandler) + _PY_UNICODE_ENCODING = enc + return 0 + +cdef const_char* _findEncodingName(const_xmlChar* buffer, int size): + "Work around bug in libxml2: find iconv name of encoding on our own." + cdef tree.xmlCharEncoding enc + enc = tree.xmlDetectCharEncoding(buffer, size) + if enc == tree.XML_CHAR_ENCODING_UTF16LE: + if size >= 4 and (buffer[0] == <const_xmlChar> b'\xFF' and + buffer[1] == <const_xmlChar> b'\xFE' and + buffer[2] == 0 and buffer[3] == 0): + return "UTF-32LE" # according to BOM + else: + return "UTF-16LE" + elif enc == tree.XML_CHAR_ENCODING_UTF16BE: + return "UTF-16BE" + elif enc == tree.XML_CHAR_ENCODING_UCS4LE: + return "UCS-4LE" + elif enc == tree.XML_CHAR_ENCODING_UCS4BE: + return "UCS-4BE" + elif enc == tree.XML_CHAR_ENCODING_NONE: + return NULL + else: + # returns a constant char*, no need to free it + return tree.xmlGetCharEncodingName(enc) + +# Python 3.12 removed support for "Py_UNICODE". +if python.PY_VERSION_HEX < 0x030C0000: + _setupPythonUnicode() + + +cdef unicode _find_PyUCS4EncodingName(): + """ + Find a suitable encoding for Py_UCS4 PyUnicode strings in libxml2. + """ + ustring = "<xml>\U0001F92A</xml>" + cdef const xmlChar* buffer = <const xmlChar*> python.PyUnicode_DATA(ustring) + cdef Py_ssize_t py_buffer_len = python.PyUnicode_GET_LENGTH(ustring) + + encoding_name = '' + cdef tree.xmlCharEncoding enc = tree.xmlDetectCharEncoding(buffer, py_buffer_len) + enchandler = tree.xmlGetCharEncodingHandler(enc) + if enchandler is not NULL: + try: + if enchandler.name: + encoding_name = enchandler.name.decode('UTF-8') + finally: + tree.xmlCharEncCloseFunc(enchandler) + else: + c_name = tree.xmlGetCharEncodingName(enc) + if c_name: + encoding_name = c_name.decode('UTF-8') + + + if encoding_name and not encoding_name.endswith('LE') and not encoding_name.endswith('BE'): + encoding_name += 'BE' if python.PY_BIG_ENDIAN else 'LE' + return encoding_name or None + +_pyucs4_encoding_name = _find_PyUCS4EncodingName() + + +############################################################ +## support for file-like objects +############################################################ + +@cython.final +@cython.internal +cdef class _FileReaderContext: + cdef object _filelike + cdef object _encoding + cdef object _url + cdef object _bytes + cdef _ExceptionContext _exc_context + cdef Py_ssize_t _bytes_read + cdef char* _c_url + cdef bint _close_file_after_read + + def __cinit__(self, filelike, exc_context not None, url, encoding=None, bint close_file=False): + self._exc_context = exc_context + self._filelike = filelike + self._close_file_after_read = close_file + self._encoding = encoding + if url is None: + self._c_url = NULL + else: + url = _encodeFilename(url) + self._c_url = _cstr(url) + self._url = url + self._bytes = b'' + self._bytes_read = 0 + + cdef _close_file(self): + if self._filelike is None or not self._close_file_after_read: + return + try: + close = self._filelike.close + except AttributeError: + close = None + finally: + self._filelike = None + if close is not None: + close() + + cdef xmlparser.xmlParserInputBuffer* _createParserInputBuffer(self) noexcept: + cdef xmlparser.xmlParserInputBuffer* c_buffer = xmlparser.xmlAllocParserInputBuffer(0) + if c_buffer: + c_buffer.readcallback = _readFilelikeParser + c_buffer.context = <python.PyObject*> self + return c_buffer + + cdef xmlparser.xmlParserInput* _createParserInput( + self, xmlparser.xmlParserCtxt* ctxt) noexcept: + cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer() + if not c_buffer: + return NULL + return xmlparser.xmlNewIOInputStream(ctxt, c_buffer, 0) + + cdef tree.xmlDtd* _readDtd(self) noexcept: + cdef xmlparser.xmlParserInputBuffer* c_buffer = self._createParserInputBuffer() + if not c_buffer: + return NULL + with nogil: + return xmlparser.xmlIOParseDTD(NULL, c_buffer, 0) + + cdef xmlDoc* _readDoc(self, xmlparser.xmlParserCtxt* ctxt, int options) noexcept: + cdef xmlDoc* result + cdef void* c_callback_context = <python.PyObject*> self + cdef char* c_encoding = _cstr(self._encoding) if self._encoding is not None else NULL + + orig_options = ctxt.options + with nogil: + if ctxt.html: + result = htmlparser.htmlCtxtReadIO( + ctxt, _readFilelikeParser, NULL, c_callback_context, + self._c_url, c_encoding, options) + if result is not NULL: + if _fixHtmlDictNames(ctxt.dict, result) < 0: + tree.xmlFreeDoc(result) + result = NULL + else: + result = xmlparser.xmlCtxtReadIO( + ctxt, _readFilelikeParser, NULL, c_callback_context, + self._c_url, c_encoding, options) + ctxt.options = orig_options # work around libxml2 problem + + try: + self._close_file() + except: + self._exc_context._store_raised() + finally: + return result # swallow any exceptions + + cdef int copyToBuffer(self, char* c_buffer, int c_requested) noexcept: + cdef int c_byte_count = 0 + cdef char* c_start + cdef Py_ssize_t byte_count, remaining + if self._bytes_read < 0: + return 0 + try: + byte_count = python.PyBytes_GET_SIZE(self._bytes) + remaining = byte_count - self._bytes_read + while c_requested > remaining: + c_start = _cstr(self._bytes) + self._bytes_read + cstring_h.memcpy(c_buffer, c_start, remaining) + c_byte_count += remaining + c_buffer += remaining + c_requested -= remaining + + self._bytes = self._filelike.read(c_requested) + if not isinstance(self._bytes, bytes): + if isinstance(self._bytes, unicode): + if self._encoding is None: + self._bytes = (<unicode>self._bytes).encode('utf8') + else: + self._bytes = python.PyUnicode_AsEncodedString( + self._bytes, _cstr(self._encoding), NULL) + else: + self._close_file() + raise TypeError, \ + "reading from file-like objects must return byte strings or unicode strings" + + remaining = python.PyBytes_GET_SIZE(self._bytes) + if remaining == 0: + self._bytes_read = -1 + self._close_file() + return c_byte_count + self._bytes_read = 0 + + if c_requested > 0: + c_start = _cstr(self._bytes) + self._bytes_read + cstring_h.memcpy(c_buffer, c_start, c_requested) + c_byte_count += c_requested + self._bytes_read += c_requested + except: + c_byte_count = -1 + self._exc_context._store_raised() + try: + self._close_file() + except: + self._exc_context._store_raised() + finally: + return c_byte_count # swallow any exceptions + +cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) noexcept with gil: + return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size) + +cdef int _readFileParser(void* ctxt, char* c_buffer, int c_size) noexcept nogil: + return stdio.fread(c_buffer, 1, c_size, <stdio.FILE*>ctxt) + +############################################################ +## support for custom document loaders +############################################################ + +cdef xmlparser.xmlParserInput* _local_resolver(const_char* c_url, const_char* c_pubid, + xmlparser.xmlParserCtxt* c_context) noexcept with gil: + cdef _ResolverContext context + cdef xmlparser.xmlParserInput* c_input + cdef _InputDocument doc_ref + cdef _FileReaderContext file_context + # if there is no _ParserContext associated with the xmlParserCtxt + # passed, check to see if the thread state object has an implied + # context. + if c_context._private is not NULL: + context = <_ResolverContext>c_context._private + else: + context = __GLOBAL_PARSER_CONTEXT.findImpliedContext() + + if context is None: + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + with nogil: + # free the GIL as we might do serious I/O here (e.g. HTTP) + c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + return c_input + + try: + if c_url is NULL: + url = None + else: + # parsing a related document (DTD etc.) => UTF-8 encoded URL? + url = _decodeFilename(<const_xmlChar*>c_url) + if c_pubid is NULL: + pubid = None + else: + pubid = funicode(<const_xmlChar*>c_pubid) # always UTF-8 + + doc_ref = context._resolvers.resolve(url, pubid, context) + except: + context._store_raised() + return NULL + + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_STRING: + data = doc_ref._data_bytes + filename = doc_ref._filename + if not filename: + filename = None + elif not isinstance(filename, bytes): + # most likely a text URL + filename = filename.encode('utf8') + if not isinstance(filename, bytes): + filename = None + + c_input = xmlparser.xmlNewInputStream(c_context) + if c_input is not NULL: + if filename is not None: + c_input.filename = <char *>tree.xmlStrdup(_xcstr(filename)) + c_input.base = _xcstr(data) + c_input.length = python.PyBytes_GET_SIZE(data) + c_input.cur = c_input.base + c_input.end = c_input.base + c_input.length + elif doc_ref._type == PARSER_DATA_FILENAME: + data = None + c_filename = _cstr(doc_ref._filename) + with nogil: + # free the GIL as we might do serious I/O here + c_input = xmlparser.xmlNewInputFromFile( + c_context, c_filename) + elif doc_ref._type == PARSER_DATA_FILE: + file_context = _FileReaderContext(doc_ref._file, context, url, + None, doc_ref._close_file) + c_input = file_context._createParserInput(c_context) + data = file_context + else: + data = None + c_input = NULL + + if data is not None: + context._storage.add(data) + if c_input is not NULL: + return c_input + + if __DEFAULT_ENTITY_LOADER is NULL: + return NULL + + with nogil: + # free the GIL as we might do serious I/O here (e.g. HTTP) + c_input = __DEFAULT_ENTITY_LOADER(c_url, c_pubid, c_context) + return c_input + +cdef xmlparser.xmlExternalEntityLoader __DEFAULT_ENTITY_LOADER +__DEFAULT_ENTITY_LOADER = xmlparser.xmlGetExternalEntityLoader() + + +cdef xmlparser.xmlExternalEntityLoader _register_document_loader() noexcept nogil: + cdef xmlparser.xmlExternalEntityLoader old = xmlparser.xmlGetExternalEntityLoader() + xmlparser.xmlSetExternalEntityLoader(<xmlparser.xmlExternalEntityLoader>_local_resolver) + return old + +cdef void _reset_document_loader(xmlparser.xmlExternalEntityLoader old) noexcept nogil: + xmlparser.xmlSetExternalEntityLoader(old) + + +############################################################ +## Parsers +############################################################ + +@cython.no_gc_clear # May have to call "self._validator.disconnect()" on dealloc. +@cython.internal +cdef class _ParserContext(_ResolverContext): + cdef _ErrorLog _error_log + cdef _ParserSchemaValidationContext _validator + cdef xmlparser.xmlParserCtxt* _c_ctxt + cdef xmlparser.xmlExternalEntityLoader _orig_loader + cdef python.PyThread_type_lock _lock + cdef _Document _doc + cdef bint _collect_ids + + def __cinit__(self): + self._c_ctxt = NULL + self._collect_ids = True + if not config.ENABLE_THREADING: + self._lock = NULL + else: + self._lock = python.PyThread_allocate_lock() + self._error_log = _ErrorLog() + + def __dealloc__(self): + if config.ENABLE_THREADING and self._lock is not NULL: + python.PyThread_free_lock(self._lock) + self._lock = NULL + if self._c_ctxt is not NULL: + if <void*>self._validator is not NULL and self._validator is not None: + # If the parser was not closed correctly (e.g. interrupted iterparse()), + # and the schema validator wasn't freed and cleaned up yet, the libxml2 SAX + # validator plug might still be in place, which will make xmlFreeParserCtxt() + # crash when trying to xmlFree() a static SAX handler. + # Thus, make sure we disconnect the handler interceptor here at the latest. + self._validator.disconnect() + xmlparser.xmlFreeParserCtxt(self._c_ctxt) + + cdef _ParserContext _copy(self): + cdef _ParserContext context + context = self.__class__() + context._collect_ids = self._collect_ids + context._validator = self._validator.copy() + _initParserContext(context, self._resolvers._copy(), NULL) + return context + + cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept: + self._c_ctxt = c_ctxt + c_ctxt._private = <void*>self + + cdef void _resetParserContext(self) noexcept: + if self._c_ctxt is not NULL: + if self._c_ctxt.html: + htmlparser.htmlCtxtReset(self._c_ctxt) + self._c_ctxt.disableSAX = 0 # work around bug in libxml2 + else: + xmlparser.xmlClearParserCtxt(self._c_ctxt) + # work around bug in libxml2 [2.9.10 .. 2.9.14]: + # https://gitlab.gnome.org/GNOME/libxml2/-/issues/378 + self._c_ctxt.nsNr = 0 + + cdef int prepare(self, bint set_document_loader=True) except -1: + cdef int result + if config.ENABLE_THREADING and self._lock is not NULL: + with nogil: + result = python.PyThread_acquire_lock( + self._lock, python.WAIT_LOCK) + if result == 0: + raise ParserError, "parser locking failed" + self._error_log.clear() + self._doc = None + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError + self._orig_loader = _register_document_loader() if set_document_loader else NULL + if self._validator is not None: + self._validator.connect(self._c_ctxt, self._error_log) + return 0 + + cdef int cleanup(self) except -1: + if self._orig_loader is not NULL: + _reset_document_loader(self._orig_loader) + try: + if self._validator is not None: + self._validator.disconnect() + self._resetParserContext() + self.clear() + self._doc = None + self._c_ctxt.sax.serror = NULL + finally: + if config.ENABLE_THREADING and self._lock is not NULL: + python.PyThread_release_lock(self._lock) + return 0 + + cdef object _handleParseResult(self, _BaseParser parser, + xmlDoc* result, filename): + c_doc = self._handleParseResultDoc(parser, result, filename) + if self._doc is not None and self._doc._c_doc is c_doc: + return self._doc + else: + return _documentFactory(c_doc, parser) + + cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, + xmlDoc* result, filename) except NULL: + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + return _handleParseResult(self, self._c_ctxt, result, + filename, recover, + free_doc=self._doc is None) + +cdef _initParserContext(_ParserContext context, + _ResolverRegistry resolvers, + xmlparser.xmlParserCtxt* c_ctxt): + _initResolverContext(context, resolvers) + if c_ctxt is not NULL: + context._initParserContext(c_ctxt) + +cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil: + (<_ParserContext>_parser_context._private)._error_log._receive(error) + +cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil: + if __DEBUG: + if c_context is NULL or (<xmlparser.xmlParserCtxt*>c_context)._private is NULL: + _forwardError(NULL, error) + else: + _forwardParserError(<xmlparser.xmlParserCtxt*>c_context, error) + +cdef int _raiseParseError(xmlparser.xmlParserCtxt* ctxt, filename, + _ErrorLog error_log) except -1: + if filename is not None and \ + ctxt.lastError.domain == xmlerror.XML_FROM_IO: + if isinstance(filename, bytes): + filename = _decodeFilenameWithLength( + <bytes>filename, len(<bytes>filename)) + if ctxt.lastError.message is not NULL: + try: + message = ctxt.lastError.message.decode('utf-8') + except UnicodeDecodeError: + # the filename may be in there => play it safe + message = ctxt.lastError.message.decode('iso8859-1') + message = f"Error reading file '{filename}': {message.strip()}" + else: + message = f"Error reading '{filename}'" + raise IOError, message + elif error_log: + raise error_log._buildParseException( + XMLSyntaxError, "Document is not well formed") + elif ctxt.lastError.message is not NULL: + message = ctxt.lastError.message.strip() + code = ctxt.lastError.code + line = ctxt.lastError.line + column = ctxt.lastError.int2 + if ctxt.lastError.line > 0: + message = f"line {line}: {message}" + raise XMLSyntaxError(message, code, line, column, filename) + else: + raise XMLSyntaxError(None, xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0, + filename) + +cdef xmlDoc* _handleParseResult(_ParserContext context, + xmlparser.xmlParserCtxt* c_ctxt, + xmlDoc* result, filename, + bint recover, bint free_doc) except NULL: + cdef bint well_formed + if result is not NULL: + __GLOBAL_PARSER_CONTEXT.initDocDict(result) + + if c_ctxt.myDoc is not NULL: + if c_ctxt.myDoc is not result: + __GLOBAL_PARSER_CONTEXT.initDocDict(c_ctxt.myDoc) + tree.xmlFreeDoc(c_ctxt.myDoc) + c_ctxt.myDoc = NULL + + if result is not NULL: + if (context._validator is not None and + not context._validator.isvalid()): + well_formed = 0 # actually not 'valid', but anyway ... + elif (not c_ctxt.wellFormed and not c_ctxt.html and + c_ctxt.charset == tree.XML_CHAR_ENCODING_8859_1 and + [1 for error in context._error_log + if error.type == ErrorTypes.ERR_INVALID_CHAR]): + # An encoding error occurred and libxml2 switched from UTF-8 + # input to (undecoded) Latin-1, at some arbitrary point in the + # document. Better raise an error than allowing for a broken + # tree with mixed encodings. This is fixed in libxml2 2.12. + well_formed = 0 + elif recover or (c_ctxt.wellFormed and + c_ctxt.lastError.level < xmlerror.XML_ERR_ERROR): + well_formed = 1 + elif not c_ctxt.replaceEntities and not c_ctxt.validate \ + and context is not None: + # in this mode, we ignore errors about undefined entities + for error in context._error_log.filter_from_errors(): + if error.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \ + error.type != ErrorTypes.ERR_UNDECLARED_ENTITY: + well_formed = 0 + break + else: + well_formed = 1 + else: + well_formed = 0 + + if not well_formed: + if free_doc: + tree.xmlFreeDoc(result) + result = NULL + + if context is not None and context._has_raised(): + if result is not NULL: + if free_doc: + tree.xmlFreeDoc(result) + result = NULL + context._raise_if_stored() + + if result is NULL: + if context is not None: + _raiseParseError(c_ctxt, filename, context._error_log) + else: + _raiseParseError(c_ctxt, filename, None) + else: + if result.URL is NULL and filename is not None: + result.URL = tree.xmlStrdup(_xcstr(filename)) + if result.encoding is NULL: + result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8") + + if context._validator is not None and \ + context._validator._add_default_attributes: + # we currently need to do this here as libxml2 does not + # support inserting default attributes during parse-time + # validation + context._validator.inject_default_attributes(result) + + return result + +cdef int _fixHtmlDictNames(tree.xmlDict* c_dict, xmlDoc* c_doc) noexcept nogil: + cdef xmlNode* c_node + if c_doc is NULL: + return 0 + c_node = c_doc.children + tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + if _fixHtmlDictNodeNames(c_dict, c_node) < 0: + return -1 + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + return 0 + +cdef int _fixHtmlDictSubtreeNames(tree.xmlDict* c_dict, xmlDoc* c_doc, + xmlNode* c_start_node) noexcept nogil: + """ + Move names to the dict, iterating in document order, starting at + c_start_node. This is used in incremental parsing after each chunk. + """ + cdef xmlNode* c_node + if not c_doc: + return 0 + if not c_start_node: + return _fixHtmlDictNames(c_dict, c_doc) + c_node = c_start_node + tree.BEGIN_FOR_EACH_ELEMENT_FROM(<xmlNode*>c_doc, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + if _fixHtmlDictNodeNames(c_dict, c_node) < 0: + return -1 + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + return 0 + +cdef inline int _fixHtmlDictNodeNames(tree.xmlDict* c_dict, + xmlNode* c_node) noexcept nogil: + cdef xmlNode* c_attr + c_name = tree.xmlDictLookup(c_dict, c_node.name, -1) + if c_name is NULL: + return -1 + if c_name is not c_node.name: + tree.xmlFree(<char*>c_node.name) + c_node.name = c_name + c_attr = <xmlNode*>c_node.properties + while c_attr is not NULL: + c_name = tree.xmlDictLookup(c_dict, c_attr.name, -1) + if c_name is NULL: + return -1 + if c_name is not c_attr.name: + tree.xmlFree(<char*>c_attr.name) + c_attr.name = c_name + c_attr = c_attr.next + return 0 + + +@cython.internal +cdef class _BaseParser: + cdef ElementClassLookup _class_lookup + cdef _ResolverRegistry _resolvers + cdef _ParserContext _parser_context + cdef _ParserContext _push_parser_context + cdef int _parse_options + cdef bint _for_html + cdef bint _remove_comments + cdef bint _remove_pis + cdef bint _strip_cdata + cdef bint _collect_ids + cdef bint _resolve_external_entities + cdef XMLSchema _schema + cdef bytes _filename + cdef readonly object target + cdef object _default_encoding + cdef tuple _events_to_collect # (event_types, tag) + + def __init__(self, int parse_options, bint for_html, XMLSchema schema, + remove_comments, remove_pis, strip_cdata, collect_ids, + target, encoding, bint resolve_external_entities=True): + cdef tree.xmlCharEncodingHandler* enchandler + cdef int c_encoding + if not isinstance(self, (XMLParser, HTMLParser)): + raise TypeError, "This class cannot be instantiated" + + self._parse_options = parse_options + self.target = target + self._for_html = for_html + self._remove_comments = remove_comments + self._remove_pis = remove_pis + self._strip_cdata = strip_cdata + self._collect_ids = collect_ids + self._resolve_external_entities = resolve_external_entities + self._schema = schema + + self._resolvers = _ResolverRegistry() + + if encoding is None: + self._default_encoding = None + else: + encoding = _utf8(encoding) + enchandler = tree.xmlFindCharEncodingHandler(_cstr(encoding)) + if enchandler is NULL: + raise LookupError, f"unknown encoding: '{encoding}'" + tree.xmlCharEncCloseFunc(enchandler) + self._default_encoding = encoding + + cdef _setBaseURL(self, base_url): + self._filename = _encodeFilename(base_url) + + cdef _collectEvents(self, event_types, tag): + if event_types is None: + event_types = () + else: + event_types = tuple(set(event_types)) + _buildParseEventFilter(event_types) # purely for validation + self._events_to_collect = (event_types, tag) + + cdef _ParserContext _getParserContext(self): + cdef xmlparser.xmlParserCtxt* pctxt + if self._parser_context is None: + self._parser_context = self._createContext(self.target, None) + self._parser_context._collect_ids = self._collect_ids + if self._schema is not None: + self._parser_context._validator = \ + self._schema._newSaxValidator( + self._parse_options & xmlparser.XML_PARSE_DTDATTR) + pctxt = self._newParserCtxt() + _initParserContext(self._parser_context, self._resolvers, pctxt) + self._configureSaxContext(pctxt) + return self._parser_context + + cdef _ParserContext _getPushParserContext(self): + cdef xmlparser.xmlParserCtxt* pctxt + if self._push_parser_context is None: + self._push_parser_context = self._createContext( + self.target, self._events_to_collect) + self._push_parser_context._collect_ids = self._collect_ids + if self._schema is not None: + self._push_parser_context._validator = \ + self._schema._newSaxValidator( + self._parse_options & xmlparser.XML_PARSE_DTDATTR) + pctxt = self._newPushParserCtxt() + _initParserContext( + self._push_parser_context, self._resolvers, pctxt) + self._configureSaxContext(pctxt) + return self._push_parser_context + + cdef _ParserContext _createContext(self, target, events_to_collect): + cdef _SaxParserContext sax_context + if target is not None: + sax_context = _TargetParserContext(self) + (<_TargetParserContext>sax_context)._setTarget(target) + elif events_to_collect: + sax_context = _SaxParserContext(self) + else: + # nothing special to configure + return _ParserContext() + if events_to_collect: + events, tag = events_to_collect + sax_context._setEventFilter(events, tag) + return sax_context + + @cython.final + cdef int _configureSaxContext(self, xmlparser.xmlParserCtxt* pctxt) except -1: + if self._remove_comments: + pctxt.sax.comment = NULL + if self._remove_pis: + pctxt.sax.processingInstruction = NULL + if self._strip_cdata: + # hard switch-off for CDATA nodes => makes them plain text + pctxt.sax.cdataBlock = NULL + if not self._resolve_external_entities: + pctxt.sax.getEntity = _getInternalEntityOnly + + cdef int _registerHtmlErrorHandler(self, xmlparser.xmlParserCtxt* c_ctxt) except -1: + cdef xmlparser.xmlSAXHandler* sax = c_ctxt.sax + if sax is not NULL and sax.initialized and sax.initialized != xmlparser.XML_SAX2_MAGIC: + # need to extend SAX1 context to SAX2 to get proper error reports + if <xmlparser.xmlSAXHandlerV1*>sax is &htmlparser.htmlDefaultSAXHandler: + sax = <xmlparser.xmlSAXHandler*> tree.xmlMalloc(sizeof(xmlparser.xmlSAXHandler)) + if sax is NULL: + raise MemoryError() + cstring_h.memcpy(sax, &htmlparser.htmlDefaultSAXHandler, + sizeof(htmlparser.htmlDefaultSAXHandler)) + c_ctxt.sax = sax + sax.initialized = xmlparser.XML_SAX2_MAGIC + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError + sax.startElementNs = NULL + sax.endElementNs = NULL + sax._private = NULL + return 0 + + cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL: + cdef xmlparser.xmlParserCtxt* c_ctxt + if self._for_html: + c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5) + if c_ctxt is not NULL: + self._registerHtmlErrorHandler(c_ctxt) + else: + c_ctxt = xmlparser.xmlNewParserCtxt() + if c_ctxt is NULL: + raise MemoryError + c_ctxt.sax.startDocument = _initSaxDocument + return c_ctxt + + cdef xmlparser.xmlParserCtxt* _newPushParserCtxt(self) except NULL: + cdef xmlparser.xmlParserCtxt* c_ctxt + cdef char* c_filename = _cstr(self._filename) if self._filename is not None else NULL + if self._for_html: + c_ctxt = htmlparser.htmlCreatePushParserCtxt( + NULL, NULL, NULL, 0, c_filename, tree.XML_CHAR_ENCODING_NONE) + if c_ctxt is not NULL: + self._registerHtmlErrorHandler(c_ctxt) + htmlparser.htmlCtxtUseOptions(c_ctxt, self._parse_options) + else: + c_ctxt = xmlparser.xmlCreatePushParserCtxt( + NULL, NULL, NULL, 0, c_filename) + if c_ctxt is not NULL: + xmlparser.xmlCtxtUseOptions(c_ctxt, self._parse_options) + if c_ctxt is NULL: + raise MemoryError() + c_ctxt.sax.startDocument = _initSaxDocument + return c_ctxt + + @property + def error_log(self): + """The error log of the last parser run. + """ + cdef _ParserContext context + context = self._getParserContext() + return context._error_log.copy() + + @property + def resolvers(self): + """The custom resolver registry of this parser.""" + return self._resolvers + + @property + def version(self): + """The version of the underlying XML parser.""" + return "libxml2 %d.%d.%d" % LIBXML_VERSION + + def set_element_class_lookup(self, ElementClassLookup lookup = None): + """set_element_class_lookup(self, lookup = None) + + Set a lookup scheme for element classes generated from this parser. + + Reset it by passing None or nothing. + """ + self._class_lookup = lookup + + cdef _BaseParser _copy(self): + "Create a new parser with the same configuration." + cdef _BaseParser parser + parser = self.__class__() + parser._parse_options = self._parse_options + parser._for_html = self._for_html + parser._remove_comments = self._remove_comments + parser._remove_pis = self._remove_pis + parser._strip_cdata = self._strip_cdata + parser._filename = self._filename + parser._resolvers = self._resolvers + parser.target = self.target + parser._class_lookup = self._class_lookup + parser._default_encoding = self._default_encoding + parser._schema = self._schema + parser._events_to_collect = self._events_to_collect + return parser + + def copy(self): + """copy(self) + + Create a new parser with the same configuration. + """ + return self._copy() + + def makeelement(self, _tag, attrib=None, nsmap=None, **_extra): + """makeelement(self, _tag, attrib=None, nsmap=None, **_extra) + + Creates a new element associated with this parser. + """ + return _makeElement(_tag, NULL, None, self, None, None, + attrib, nsmap, _extra) + + # internal parser methods + + cdef xmlDoc* _parseUnicodeDoc(self, utext, char* c_filename) except NULL: + """Parse unicode document, share dictionary if possible. + """ + cdef _ParserContext context + cdef xmlDoc* result + cdef xmlparser.xmlParserCtxt* pctxt + cdef Py_ssize_t py_buffer_len + cdef int buffer_len, c_kind + cdef const_char* c_text + cdef const_char* c_encoding = _PY_UNICODE_ENCODING + if python.PyUnicode_IS_READY(utext): + # PEP-393 string + c_text = <const_char*>python.PyUnicode_DATA(utext) + py_buffer_len = python.PyUnicode_GET_LENGTH(utext) + c_kind = python.PyUnicode_KIND(utext) + if c_kind == 1: + if python.PyUnicode_MAX_CHAR_VALUE(utext) <= 127: + c_encoding = 'UTF-8' + else: + c_encoding = 'ISO-8859-1' + elif c_kind == 2: + py_buffer_len *= 2 + if python.PY_BIG_ENDIAN: + c_encoding = 'UTF-16BE' # actually UCS-2 + else: + c_encoding = 'UTF-16LE' # actually UCS-2 + elif c_kind == 4: + py_buffer_len *= 4 + if python.PY_BIG_ENDIAN: + c_encoding = 'UTF-32BE' # actually UCS-4 + else: + c_encoding = 'UTF-32LE' # actually UCS-4 + else: + assert False, f"Illegal Unicode kind {c_kind}" + else: + # old Py_UNICODE string + py_buffer_len = python.PyUnicode_GET_DATA_SIZE(utext) + c_text = python.PyUnicode_AS_DATA(utext) + assert 0 <= py_buffer_len <= limits.INT_MAX + buffer_len = py_buffer_len + + context = self._getParserContext() + context.prepare() + try: + pctxt = context._c_ctxt + __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) + orig_options = pctxt.options + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadMemory( + pctxt, c_text, buffer_len, c_filename, c_encoding, + self._parse_options) + if result is not NULL: + if _fixHtmlDictNames(pctxt.dict, result) < 0: + tree.xmlFreeDoc(result) + result = NULL + else: + result = xmlparser.xmlCtxtReadMemory( + pctxt, c_text, buffer_len, c_filename, c_encoding, + self._parse_options) + pctxt.options = orig_options # work around libxml2 problem + + return context._handleParseResultDoc(self, result, None) + finally: + context.cleanup() + + cdef xmlDoc* _parseDoc(self, char* c_text, int c_len, + char* c_filename) except NULL: + """Parse document, share dictionary if possible. + """ + cdef _ParserContext context + cdef xmlDoc* result + cdef xmlparser.xmlParserCtxt* pctxt + cdef char* c_encoding + cdef tree.xmlCharEncoding enc + context = self._getParserContext() + context.prepare() + try: + pctxt = context._c_ctxt + __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) + + if self._default_encoding is None: + c_encoding = NULL + # libxml2 (at least 2.9.3) does not recognise UTF-32 BOMs + # NOTE: limit to problematic cases because it changes character offsets + if c_len >= 4 and (c_text[0] == b'\xFF' and c_text[1] == b'\xFE' and + c_text[2] == 0 and c_text[3] == 0): + c_encoding = "UTF-32LE" + c_text += 4 + c_len -= 4 + elif c_len >= 4 and (c_text[0] == 0 and c_text[1] == 0 and + c_text[2] == b'\xFE' and c_text[3] == b'\xFF'): + c_encoding = "UTF-32BE" + c_text += 4 + c_len -= 4 + else: + # no BOM => try to determine encoding + enc = tree.xmlDetectCharEncoding(<const_xmlChar*>c_text, c_len) + if enc == tree.XML_CHAR_ENCODING_UCS4LE: + c_encoding = 'UTF-32LE' + elif enc == tree.XML_CHAR_ENCODING_UCS4BE: + c_encoding = 'UTF-32BE' + else: + c_encoding = _cstr(self._default_encoding) + + orig_options = pctxt.options + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadMemory( + pctxt, c_text, c_len, c_filename, + c_encoding, self._parse_options) + if result is not NULL: + if _fixHtmlDictNames(pctxt.dict, result) < 0: + tree.xmlFreeDoc(result) + result = NULL + else: + result = xmlparser.xmlCtxtReadMemory( + pctxt, c_text, c_len, c_filename, + c_encoding, self._parse_options) + pctxt.options = orig_options # work around libxml2 problem + + return context._handleParseResultDoc(self, result, None) + finally: + context.cleanup() + + cdef xmlDoc* _parseDocFromFile(self, char* c_filename) except NULL: + cdef _ParserContext context + cdef xmlDoc* result + cdef xmlparser.xmlParserCtxt* pctxt + cdef char* c_encoding + result = NULL + + context = self._getParserContext() + context.prepare() + try: + pctxt = context._c_ctxt + __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) + + if self._default_encoding is None: + c_encoding = NULL + else: + c_encoding = _cstr(self._default_encoding) + + orig_options = pctxt.options + with nogil: + if self._for_html: + result = htmlparser.htmlCtxtReadFile( + pctxt, c_filename, c_encoding, self._parse_options) + if result is not NULL: + if _fixHtmlDictNames(pctxt.dict, result) < 0: + tree.xmlFreeDoc(result) + result = NULL + else: + result = xmlparser.xmlCtxtReadFile( + pctxt, c_filename, c_encoding, self._parse_options) + pctxt.options = orig_options # work around libxml2 problem + + return context._handleParseResultDoc(self, result, c_filename) + finally: + context.cleanup() + + cdef xmlDoc* _parseDocFromFilelike(self, filelike, filename, + encoding) except NULL: + cdef _ParserContext context + cdef _FileReaderContext file_context + cdef xmlDoc* result + cdef xmlparser.xmlParserCtxt* pctxt + cdef char* c_filename + if not filename: + filename = None + + context = self._getParserContext() + context.prepare() + try: + pctxt = context._c_ctxt + __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) + file_context = _FileReaderContext( + filelike, context, filename, + encoding or self._default_encoding) + result = file_context._readDoc(pctxt, self._parse_options) + + return context._handleParseResultDoc( + self, result, filename) + finally: + context.cleanup() + + +cdef tree.xmlEntity* _getInternalEntityOnly(void* ctxt, const_xmlChar* name) noexcept nogil: + """ + Callback function to intercept the entity resolution when external entity loading is disabled. + """ + cdef tree.xmlEntity* entity = xmlparser.xmlSAX2GetEntity(ctxt, name) + if not entity: + return NULL + if entity.etype not in ( + tree.xmlEntityType.XML_EXTERNAL_GENERAL_PARSED_ENTITY, + tree.xmlEntityType.XML_EXTERNAL_GENERAL_UNPARSED_ENTITY, + tree.xmlEntityType.XML_EXTERNAL_PARAMETER_ENTITY): + return entity + + # Reject all external entities and fail the parsing instead. There is currently + # no way in libxml2 to just prevent the entity resolution in this case. + cdef xmlerror.xmlError c_error + cdef xmlerror.xmlStructuredErrorFunc err_func + cdef xmlparser.xmlParserInput* parser_input + cdef void* err_context + + c_ctxt = <xmlparser.xmlParserCtxt *> ctxt + err_func = xmlerror.xmlStructuredError + if err_func: + parser_input = c_ctxt.input + # Copied from xmlVErrParser() in libxml2: get current input from stack. + if parser_input and parser_input.filename is NULL and c_ctxt.inputNr > 1: + parser_input = c_ctxt.inputTab[c_ctxt.inputNr - 2] + + c_error = xmlerror.xmlError( + domain=xmlerror.xmlErrorDomain.XML_FROM_PARSER, + code=xmlerror.xmlParserErrors.XML_ERR_EXT_ENTITY_STANDALONE, + level=xmlerror.xmlErrorLevel.XML_ERR_FATAL, + message=b"External entity resolution is disabled for security reasons " + b"when resolving '&%s;'. Use 'XMLParser(resolve_entities=True)' " + b"if you consider it safe to enable it.", + file=parser_input.filename, + node=entity, + str1=<char*> name, + str2=NULL, + str3=NULL, + line=parser_input.line if parser_input else 0, + int1=0, + int2=parser_input.col if parser_input else 0, + ) + err_context = xmlerror.xmlStructuredErrorContext + err_func(err_context, &c_error) + + c_ctxt.wellFormed = 0 + # The entity was looked up and does not need to be freed. + return NULL + + +cdef void _initSaxDocument(void* ctxt) noexcept with gil: + xmlparser.xmlSAX2StartDocument(ctxt) + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + c_doc = c_ctxt.myDoc + + # set up document dict + if c_doc and c_ctxt.dict and not c_doc.dict: + # I have no idea why libxml2 disables this - we need it + c_ctxt.dictNames = 1 + c_doc.dict = c_ctxt.dict + xmlparser.xmlDictReference(c_ctxt.dict) + + # set up XML ID hash table + if c_ctxt._private: + context = <_ParserContext>c_ctxt._private + if context._collect_ids: + # keep the global parser dict from filling up with XML IDs + if c_doc and not c_doc.ids: + # memory errors are not fatal here + c_dict = xmlparser.xmlDictCreate() + if c_dict: + c_doc.ids = tree.xmlHashCreateDict(0, c_dict) + xmlparser.xmlDictFree(c_dict) + else: + c_doc.ids = tree.xmlHashCreate(0) + else: + c_ctxt.loadsubset |= xmlparser.XML_SKIP_IDS + if c_doc and c_doc.ids and not tree.xmlHashSize(c_doc.ids): + # already initialised but empty => clear + tree.xmlHashFree(c_doc.ids, NULL) + c_doc.ids = NULL + + +############################################################ +## ET feed parser +############################################################ + +cdef class _FeedParser(_BaseParser): + cdef bint _feed_parser_running + + @property + def feed_error_log(self): + """The error log of the last (or current) run of the feed parser. + + Note that this is local to the feed parser and thus is + different from what the ``error_log`` property returns. + """ + return self._getPushParserContext()._error_log.copy() + + cpdef feed(self, data): + """feed(self, data) + + Feeds data to the parser. The argument should be an 8-bit string + buffer containing encoded data, although Unicode is supported as long + as both string types are not mixed. + + This is the main entry point to the consumer interface of a + parser. The parser will parse as much of the XML stream as it + can on each call. To finish parsing or to reset the parser, + call the ``close()`` method. Both methods may raise + ParseError if errors occur in the input data. If an error is + raised, there is no longer a need to call ``close()``. + + The feed parser interface is independent of the normal parser + usage. You can use the same parser as a feed parser and in + the ``parse()`` function concurrently. + """ + cdef _ParserContext context + cdef bytes bstring + cdef xmlparser.xmlParserCtxt* pctxt + cdef Py_ssize_t py_buffer_len, ustart + cdef const_char* char_data + cdef const_char* c_encoding + cdef int buffer_len + cdef int error + cdef bint recover = self._parse_options & xmlparser.XML_PARSE_RECOVER + + if isinstance(data, bytes): + if self._default_encoding is None: + c_encoding = NULL + else: + c_encoding = self._default_encoding + char_data = _cstr(data) + py_buffer_len = python.PyBytes_GET_SIZE(data) + ustart = 0 + elif isinstance(data, unicode): + c_encoding = b"UTF-8" + char_data = NULL + py_buffer_len = len(<unicode> data) + ustart = 0 + else: + raise TypeError, "Parsing requires string data" + + context = self._getPushParserContext() + pctxt = context._c_ctxt + error = 0 + if not self._feed_parser_running: + context.prepare(set_document_loader=False) + self._feed_parser_running = 1 + c_filename = (_cstr(self._filename) + if self._filename is not None else NULL) + + # We have to give *mlCtxtResetPush() enough input to figure + # out the character encoding (at least four bytes), + # however if we give it all we got, we'll have nothing for + # *mlParseChunk() and things go wrong. + buffer_len = 0 + if char_data is not NULL: + buffer_len = 4 if py_buffer_len > 4 else <int>py_buffer_len + orig_loader = _register_document_loader() + if self._for_html: + error = _htmlCtxtResetPush( + pctxt, char_data, buffer_len, c_filename, c_encoding, + self._parse_options) + else: + xmlparser.xmlCtxtUseOptions(pctxt, self._parse_options) + error = xmlparser.xmlCtxtResetPush( + pctxt, char_data, buffer_len, c_filename, c_encoding) + _reset_document_loader(orig_loader) + py_buffer_len -= buffer_len + char_data += buffer_len + if error: + raise MemoryError() + __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt) + + #print pctxt.charset, 'NONE' if c_encoding is NULL else c_encoding + + fixup_error = 0 + while py_buffer_len > 0 and (error == 0 or recover): + if char_data is NULL: + # Unicode parsing by converting chunks to UTF-8 + buffer_len = 2**19 # len(bytes) <= 4 * (2**19) == 2 MiB + bstring = (<unicode> data)[ustart : ustart+buffer_len].encode('UTF-8') + ustart += buffer_len + py_buffer_len -= buffer_len # may end up < 0 + error, fixup_error = _parse_data_chunk(pctxt, <const char*> bstring, <int> len(bstring)) + else: + # Direct byte string parsing. + buffer_len = <int>py_buffer_len if py_buffer_len <= limits.INT_MAX else limits.INT_MAX + error, fixup_error = _parse_data_chunk(pctxt, char_data, buffer_len) + py_buffer_len -= buffer_len + char_data += buffer_len + + if fixup_error: + context.store_exception(MemoryError()) + + if context._has_raised(): + # propagate Python exceptions immediately + recover = 0 + error = 1 + break + + if error and not pctxt.replaceEntities and not pctxt.validate: + # in this mode, we ignore errors about undefined entities + for entry in context._error_log.filter_from_errors(): + if entry.type != ErrorTypes.WAR_UNDECLARED_ENTITY and \ + entry.type != ErrorTypes.ERR_UNDECLARED_ENTITY: + break + else: + error = 0 + + if not pctxt.wellFormed and pctxt.disableSAX and context._has_raised(): + # propagate Python exceptions immediately + recover = 0 + error = 1 + + if fixup_error or not recover and (error or not pctxt.wellFormed): + self._feed_parser_running = 0 + try: + context._handleParseResult(self, pctxt.myDoc, None) + finally: + context.cleanup() + + cpdef close(self): + """close(self) + + Terminates feeding data to this parser. This tells the parser to + process any remaining data in the feed buffer, and then returns the + root Element of the tree that was parsed. + + This method must be called after passing the last chunk of data into + the ``feed()`` method. It should only be called when using the feed + parser interface, all other usage is undefined. + """ + if not self._feed_parser_running: + raise XMLSyntaxError("no element found", + xmlerror.XML_ERR_INTERNAL_ERROR, 0, 0, + self._filename) + + context = self._getPushParserContext() + pctxt = context._c_ctxt + + self._feed_parser_running = 0 + if self._for_html: + htmlparser.htmlParseChunk(pctxt, NULL, 0, 1) + else: + xmlparser.xmlParseChunk(pctxt, NULL, 0, 1) + + if (pctxt.recovery and not pctxt.disableSAX and + isinstance(context, _SaxParserContext)): + # apply any left-over 'end' events + (<_SaxParserContext>context).flushEvents() + + try: + result = context._handleParseResult(self, pctxt.myDoc, None) + finally: + context.cleanup() + + if isinstance(result, _Document): + return (<_Document>result).getroot() + else: + return result + + +cdef (int, int) _parse_data_chunk(xmlparser.xmlParserCtxt* c_ctxt, + const char* char_data, int buffer_len): + fixup_error = 0 + with nogil: + if c_ctxt.html: + c_node = c_ctxt.node # last node where the parser stopped + orig_loader = _register_document_loader() + error = htmlparser.htmlParseChunk(c_ctxt, char_data, buffer_len, 0) + _reset_document_loader(orig_loader) + # and now for the fun part: move node names to the dict + if c_ctxt.myDoc: + fixup_error = _fixHtmlDictSubtreeNames( + c_ctxt.dict, c_ctxt.myDoc, c_node) + if c_ctxt.myDoc.dict and c_ctxt.myDoc.dict is not c_ctxt.dict: + xmlparser.xmlDictFree(c_ctxt.myDoc.dict) + c_ctxt.myDoc.dict = c_ctxt.dict + xmlparser.xmlDictReference(c_ctxt.dict) + else: + orig_loader = _register_document_loader() + error = xmlparser.xmlParseChunk(c_ctxt, char_data, buffer_len, 0) + _reset_document_loader(orig_loader) + return (error, fixup_error) + + +cdef int _htmlCtxtResetPush(xmlparser.xmlParserCtxt* c_ctxt, + const_char* c_data, int buffer_len, + const_char* c_filename, const_char* c_encoding, + int parse_options) except -1: + cdef xmlparser.xmlParserInput* c_input_stream + # libxml2 lacks an HTML push parser setup function + error = xmlparser.xmlCtxtResetPush( + c_ctxt, c_data, buffer_len, c_filename, c_encoding) + if error: + return error + + # fix libxml2 setup for HTML + c_ctxt.progressive = 1 + c_ctxt.html = 1 + htmlparser.htmlCtxtUseOptions(c_ctxt, parse_options) + + return 0 + + +############################################################ +## XML parser +############################################################ + +cdef int _XML_DEFAULT_PARSE_OPTIONS +_XML_DEFAULT_PARSE_OPTIONS = ( + xmlparser.XML_PARSE_NOENT | + xmlparser.XML_PARSE_NOCDATA | + xmlparser.XML_PARSE_NONET | + xmlparser.XML_PARSE_COMPACT | + xmlparser.XML_PARSE_BIG_LINES + ) + +cdef class XMLParser(_FeedParser): + """XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, schema: XMLSchema =None, huge_tree=False, remove_blank_text=False, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, collect_ids=True, target=None, compact=True) + + The XML parser. + + Parsers can be supplied as additional argument to various parse + functions of the lxml API. A default parser is always available + and can be replaced by a call to the global function + 'set_default_parser'. New parsers can be created at any time + without a major run-time overhead. + + The keyword arguments in the constructor are mainly based on the + libxml2 parser configuration. A DTD will also be loaded if DTD + validation or attribute default values are requested (unless you + additionally provide an XMLSchema from which the default + attributes can be read). + + Available boolean keyword arguments: + + - attribute_defaults - inject default attributes from DTD or XMLSchema + - dtd_validation - validate against a DTD referenced by the document + - load_dtd - use DTD for parsing + - no_network - prevent network access for related files (default: True) + - ns_clean - clean up redundant namespace declarations + - recover - try hard to parse through broken XML + - remove_blank_text - discard blank text nodes that appear ignorable + - remove_comments - discard comments + - remove_pis - discard processing instructions + - strip_cdata - replace CDATA sections by normal text content (default: True) + - compact - save memory for short text content (default: True) + - collect_ids - use a hash table of XML IDs for fast access (default: True, always True with DTD validation) + - huge_tree - disable security restrictions and support very deep trees + and very long text content (only affects libxml2 2.7+) + + Other keyword arguments: + + - resolve_entities - replace entities by their text value: False for keeping the + entity references, True for resolving them, and 'internal' for resolving + internal definitions only (no external file/URL access). + The default used to be True and was changed to 'internal' in lxml 5.0. + - encoding - override the document encoding (note: libiconv encoding name) + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against + + Note that you should avoid sharing parsers between threads. While this is + not harmful, it is more efficient to use separate parsers. This does not + apply to the default parser. + """ + def __init__(self, *, encoding=None, attribute_defaults=False, + dtd_validation=False, load_dtd=False, no_network=True, + ns_clean=False, recover=False, XMLSchema schema=None, + huge_tree=False, remove_blank_text=False, resolve_entities='internal', + remove_comments=False, remove_pis=False, strip_cdata=True, + collect_ids=True, target=None, compact=True): + cdef int parse_options + cdef bint resolve_external = True + parse_options = _XML_DEFAULT_PARSE_OPTIONS + if load_dtd: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD + if dtd_validation: + parse_options = parse_options | xmlparser.XML_PARSE_DTDVALID | \ + xmlparser.XML_PARSE_DTDLOAD + if attribute_defaults: + parse_options = parse_options | xmlparser.XML_PARSE_DTDATTR + if schema is None: + parse_options = parse_options | xmlparser.XML_PARSE_DTDLOAD + if ns_clean: + parse_options = parse_options | xmlparser.XML_PARSE_NSCLEAN + if recover: + parse_options = parse_options | xmlparser.XML_PARSE_RECOVER + if remove_blank_text: + parse_options = parse_options | xmlparser.XML_PARSE_NOBLANKS + if huge_tree: + parse_options = parse_options | xmlparser.XML_PARSE_HUGE + if not no_network: + parse_options = parse_options ^ xmlparser.XML_PARSE_NONET + if not compact: + parse_options = parse_options ^ xmlparser.XML_PARSE_COMPACT + if not resolve_entities: + parse_options = parse_options ^ xmlparser.XML_PARSE_NOENT + elif resolve_entities == 'internal': + resolve_external = False + if not strip_cdata: + parse_options = parse_options ^ xmlparser.XML_PARSE_NOCDATA + + _BaseParser.__init__(self, parse_options, False, schema, + remove_comments, remove_pis, strip_cdata, + collect_ids, target, encoding, resolve_external) + + +cdef class XMLPullParser(XMLParser): + """XMLPullParser(self, events=None, *, tag=None, **kwargs) + + XML parser that collects parse events in an iterator. + + The collected events are the same as for iterparse(), but the + parser itself is non-blocking in the sense that it receives + data chunks incrementally through its .feed() method, instead + of reading them directly from a file(-like) object all by itself. + + By default, it collects Element end events. To change that, + pass any subset of the available events into the ``events`` + argument: ``'start'``, ``'end'``, ``'start-ns'``, + ``'end-ns'``, ``'comment'``, ``'pi'``. + + To support loading external dependencies relative to the input + source, you can pass the ``base_url``. + """ + def __init__(self, events=None, *, tag=None, base_url=None, **kwargs): + XMLParser.__init__(self, **kwargs) + if events is None: + events = ('end',) + self._setBaseURL(base_url) + self._collectEvents(events, tag) + + def read_events(self): + return (<_SaxParserContext?>self._getPushParserContext()).events_iterator + + +cdef class ETCompatXMLParser(XMLParser): + """ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \ + dtd_validation=False, load_dtd=False, no_network=True, \ + ns_clean=False, recover=False, schema=None, \ + huge_tree=False, remove_blank_text=False, resolve_entities=True, \ + remove_comments=True, remove_pis=True, strip_cdata=True, \ + target=None, compact=True) + + An XML parser with an ElementTree compatible default setup. + + See the XMLParser class for details. + + This parser has ``remove_comments`` and ``remove_pis`` enabled by default + and thus ignores comments and processing instructions. + """ + def __init__(self, *, encoding=None, attribute_defaults=False, + dtd_validation=False, load_dtd=False, no_network=True, + ns_clean=False, recover=False, schema=None, + huge_tree=False, remove_blank_text=False, resolve_entities=True, + remove_comments=True, remove_pis=True, strip_cdata=True, + target=None, compact=True): + XMLParser.__init__(self, + attribute_defaults=attribute_defaults, + dtd_validation=dtd_validation, + load_dtd=load_dtd, + no_network=no_network, + ns_clean=ns_clean, + recover=recover, + remove_blank_text=remove_blank_text, + huge_tree=huge_tree, + compact=compact, + resolve_entities=resolve_entities, + remove_comments=remove_comments, + remove_pis=remove_pis, + strip_cdata=strip_cdata, + target=target, + encoding=encoding, + schema=schema) + +# ET 1.2 compatible name +XMLTreeBuilder = ETCompatXMLParser + + +cdef XMLParser __DEFAULT_XML_PARSER +__DEFAULT_XML_PARSER = XMLParser() + +__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER) + +def set_default_parser(_BaseParser parser=None): + """set_default_parser(parser=None) + + Set a default parser for the current thread. This parser is used + globally whenever no parser is supplied to the various parse functions of + the lxml API. If this function is called without a parser (or if it is + None), the default parser is reset to the original configuration. + + Note that the pre-installed default parser is not thread-safe. Avoid the + default parser in multi-threaded environments. You can create a separate + parser for each thread explicitly or use a parser pool. + """ + if parser is None: + parser = __DEFAULT_XML_PARSER + __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser) + +def get_default_parser(): + "get_default_parser()" + return __GLOBAL_PARSER_CONTEXT.getDefaultParser() + +############################################################ +## HTML parser +############################################################ + +cdef int _HTML_DEFAULT_PARSE_OPTIONS +_HTML_DEFAULT_PARSE_OPTIONS = ( + htmlparser.HTML_PARSE_RECOVER | + htmlparser.HTML_PARSE_NONET | + htmlparser.HTML_PARSE_COMPACT + ) + +cdef object _UNUSED = object() + +cdef class HTMLParser(_FeedParser): + """HTMLParser(self, encoding=None, remove_blank_text=False, \ + remove_comments=False, remove_pis=False, \ + no_network=True, target=None, schema: XMLSchema =None, \ + recover=True, compact=True, collect_ids=True, huge_tree=False) + + The HTML parser. + + This parser allows reading HTML into a normal XML tree. By + default, it can read broken (non well-formed) HTML, depending on + the capabilities of libxml2. Use the 'recover' option to switch + this off. + + Available boolean keyword arguments: + + - recover - try hard to parse through broken HTML (default: True) + - no_network - prevent network access for related files (default: True) + - remove_blank_text - discard empty text nodes that are ignorable (i.e. not actual text content) + - remove_comments - discard comments + - remove_pis - discard processing instructions + - compact - save memory for short text content (default: True) + - default_doctype - add a default doctype even if it is not found in the HTML (default: True) + - collect_ids - use a hash table of XML IDs for fast access (default: True) + - huge_tree - disable security restrictions and support very deep trees + and very long text content (only affects libxml2 2.7+) + + Other keyword arguments: + + - encoding - override the document encoding (note: libiconv encoding name) + - target - a parser target object that will receive the parse events + - schema - an XMLSchema to validate against + + Note that you should avoid sharing parsers between threads for performance + reasons. + """ + def __init__(self, *, encoding=None, remove_blank_text=False, + remove_comments=False, remove_pis=False, strip_cdata=_UNUSED, + no_network=True, target=None, XMLSchema schema=None, + recover=True, compact=True, default_doctype=True, + collect_ids=True, huge_tree=False): + cdef int parse_options + parse_options = _HTML_DEFAULT_PARSE_OPTIONS + if remove_blank_text: + parse_options = parse_options | htmlparser.HTML_PARSE_NOBLANKS + if not recover: + parse_options = parse_options ^ htmlparser.HTML_PARSE_RECOVER + if not no_network: + parse_options = parse_options ^ htmlparser.HTML_PARSE_NONET + if not compact: + parse_options = parse_options ^ htmlparser.HTML_PARSE_COMPACT + if not default_doctype: + parse_options = parse_options ^ htmlparser.HTML_PARSE_NODEFDTD + if huge_tree: + parse_options = parse_options | xmlparser.XML_PARSE_HUGE + + if strip_cdata is not _UNUSED: + import warnings + warnings.warn( + "The 'strip_cdata' option of HTMLParser() has never done anything and will eventually be removed.", + DeprecationWarning) + _BaseParser.__init__(self, parse_options, True, schema, + remove_comments, remove_pis, strip_cdata, + collect_ids, target, encoding) + + +cdef HTMLParser __DEFAULT_HTML_PARSER +__DEFAULT_HTML_PARSER = HTMLParser() + + +cdef class HTMLPullParser(HTMLParser): + """HTMLPullParser(self, events=None, *, tag=None, base_url=None, **kwargs) + + HTML parser that collects parse events in an iterator. + + The collected events are the same as for iterparse(), but the + parser itself is non-blocking in the sense that it receives + data chunks incrementally through its .feed() method, instead + of reading them directly from a file(-like) object all by itself. + + By default, it collects Element end events. To change that, + pass any subset of the available events into the ``events`` + argument: ``'start'``, ``'end'``, ``'start-ns'``, + ``'end-ns'``, ``'comment'``, ``'pi'``. + + To support loading external dependencies relative to the input + source, you can pass the ``base_url``. + """ + def __init__(self, events=None, *, tag=None, base_url=None, **kwargs): + HTMLParser.__init__(self, **kwargs) + if events is None: + events = ('end',) + self._setBaseURL(base_url) + self._collectEvents(events, tag) + + def read_events(self): + return (<_SaxParserContext?>self._getPushParserContext()).events_iterator + + +############################################################ +## helper functions for document creation +############################################################ + +cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL: + cdef char* c_filename + cdef char* c_text + cdef Py_ssize_t c_len + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + if not filename: + c_filename = NULL + else: + filename_utf = _encodeFilenameUTF8(filename) + c_filename = _cstr(filename_utf) + if isinstance(text, unicode): + if python.PyUnicode_IS_READY(text): + # PEP-393 Unicode string + c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text) + else: + # old Py_UNICODE string + c_len = python.PyUnicode_GET_DATA_SIZE(text) + if c_len > limits.INT_MAX: + return (<_BaseParser>parser)._parseDocFromFilelike( + StringIO(text), filename, None) + return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename) + else: + c_len = python.PyBytes_GET_SIZE(text) + if c_len > limits.INT_MAX: + return (<_BaseParser>parser)._parseDocFromFilelike( + BytesIO(text), filename, None) + c_text = _cstr(text) + return (<_BaseParser>parser)._parseDoc(c_text, c_len, c_filename) + +cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL: + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename8)) + +cdef xmlDoc* _parseDocFromFilelike(source, filename, + _BaseParser parser) except NULL: + if parser is None: + parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser() + return (<_BaseParser>parser)._parseDocFromFilelike(source, filename, None) + +cdef xmlDoc* _newXMLDoc() except NULL: + cdef xmlDoc* result + result = tree.xmlNewDoc(NULL) + if result is NULL: + raise MemoryError() + if result.encoding is NULL: + result.encoding = tree.xmlStrdup(<unsigned char*>"UTF-8") + __GLOBAL_PARSER_CONTEXT.initDocDict(result) + return result + +cdef xmlDoc* _newHTMLDoc() except NULL: + cdef xmlDoc* result + result = tree.htmlNewDoc(NULL, NULL) + if result is NULL: + raise MemoryError() + __GLOBAL_PARSER_CONTEXT.initDocDict(result) + return result + +cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive) except NULL: + cdef xmlDoc* result + if recursive: + with nogil: + result = tree.xmlCopyDoc(c_doc, recursive) + else: + result = tree.xmlCopyDoc(c_doc, 0) + if result is NULL: + raise MemoryError() + __GLOBAL_PARSER_CONTEXT.initDocDict(result) + return result + +cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root) except NULL: + "Recursively copy the document and make c_new_root the new root node." + cdef xmlDoc* result + cdef xmlNode* c_node + result = tree.xmlCopyDoc(c_doc, 0) # non recursive + __GLOBAL_PARSER_CONTEXT.initDocDict(result) + with nogil: + c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive + if c_node is NULL: + raise MemoryError() + tree.xmlDocSetRootElement(result, c_node) + _copyTail(c_new_root.next, c_node) + return result + +cdef xmlNode* _copyNodeToDoc(xmlNode* c_node, xmlDoc* c_doc) except NULL: + "Recursively copy the element into the document. c_doc is not modified." + cdef xmlNode* c_root + c_root = tree.xmlDocCopyNode(c_node, c_doc, 1) # recursive + if c_root is NULL: + raise MemoryError() + _copyTail(c_node.next, c_root) + return c_root + + +############################################################ +## API level helper functions for _Document creation +############################################################ + +cdef _Document _parseDocument(source, _BaseParser parser, base_url): + cdef _Document doc + source = _getFSPathOrObject(source) + if _isString(source): + # parse the file directly from the filesystem + doc = _parseDocumentFromURL(_encodeFilename(source), parser) + # fix base URL if requested + if base_url is not None: + base_url = _encodeFilenameUTF8(base_url) + if doc._c_doc.URL is not NULL: + tree.xmlFree(<char*>doc._c_doc.URL) + doc._c_doc.URL = tree.xmlStrdup(_xcstr(base_url)) + return doc + + if base_url is not None: + url = base_url + else: + url = _getFilenameForFile(source) + + if hasattr(source, 'getvalue') and hasattr(source, 'tell'): + # StringIO - reading from start? + if source.tell() == 0: + return _parseMemoryDocument(source.getvalue(), url, parser) + + # Support for file-like objects (urlgrabber.urlopen, ...) + if hasattr(source, 'read'): + return _parseFilelikeDocument(source, url, parser) + + raise TypeError, f"cannot parse from '{python._fqtypename(source).decode('UTF-8')}'" + +cdef _Document _parseDocumentFromURL(url, _BaseParser parser): + c_doc = _parseDocFromFile(url, parser) + return _documentFactory(c_doc, parser) + +cdef _Document _parseMemoryDocument(text, url, _BaseParser parser): + if isinstance(text, unicode): + if _hasEncodingDeclaration(text): + raise ValueError( + "Unicode strings with encoding declaration are not supported. " + "Please use bytes input or XML fragments without declaration.") + elif not isinstance(text, bytes): + raise ValueError, "can only parse strings" + c_doc = _parseDoc(text, url, parser) + return _documentFactory(c_doc, parser) + +cdef _Document _parseFilelikeDocument(source, url, _BaseParser parser): + c_doc = _parseDocFromFilelike(source, url, parser) + return _documentFactory(c_doc, parser) diff --git a/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi b/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi new file mode 100644 index 00000000..37c29957 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/parsertarget.pxi @@ -0,0 +1,180 @@ +# Parser target context (ET target interface) + +cdef object inspect_getargspec +try: + from inspect import getfullargspec as inspect_getargspec +except ImportError: + from inspect import getargspec as inspect_getargspec + + +class _TargetParserResult(Exception): + # Admittedly, this is somewhat ugly, but it's the easiest way + # to push the Python level parser result through the parser + # machinery towards the API level functions + def __init__(self, result): + self.result = result + + +@cython.final +@cython.internal +cdef class _PythonSaxParserTarget(_SaxParserTarget): + cdef object _target_start + cdef object _target_end + cdef object _target_data + cdef object _target_start_ns + cdef object _target_end_ns + cdef object _target_doctype + cdef object _target_pi + cdef object _target_comment + cdef bint _start_takes_nsmap + + def __cinit__(self, target): + cdef int event_filter + event_filter = 0 + self._start_takes_nsmap = 0 + try: + self._target_start = target.start + if self._target_start is not None: + event_filter |= SAX_EVENT_START + except AttributeError: + pass + else: + try: + arguments = inspect_getargspec(self._target_start) + if len(arguments[0]) > 3 or arguments[1] is not None: + self._start_takes_nsmap = 1 + except TypeError: + pass + try: + self._target_end = target.end + if self._target_end is not None: + event_filter |= SAX_EVENT_END + except AttributeError: + pass + try: + self._target_start_ns = target.start_ns + if self._target_start_ns is not None: + event_filter |= SAX_EVENT_START_NS + except AttributeError: + pass + try: + self._target_end_ns = target.end_ns + if self._target_end_ns is not None: + event_filter |= SAX_EVENT_END_NS + except AttributeError: + pass + try: + self._target_data = target.data + if self._target_data is not None: + event_filter |= SAX_EVENT_DATA + except AttributeError: + pass + try: + self._target_doctype = target.doctype + if self._target_doctype is not None: + event_filter |= SAX_EVENT_DOCTYPE + except AttributeError: + pass + try: + self._target_pi = target.pi + if self._target_pi is not None: + event_filter |= SAX_EVENT_PI + except AttributeError: + pass + try: + self._target_comment = target.comment + if self._target_comment is not None: + event_filter |= SAX_EVENT_COMMENT + except AttributeError: + pass + self._sax_event_filter = event_filter + + cdef _handleSaxStart(self, tag, attrib, nsmap): + if self._start_takes_nsmap: + return self._target_start(tag, attrib, nsmap) + else: + return self._target_start(tag, attrib) + + cdef _handleSaxEnd(self, tag): + return self._target_end(tag) + + cdef _handleSaxStartNs(self, prefix, uri): + return self._target_start_ns(prefix, uri) + + cdef _handleSaxEndNs(self, prefix): + return self._target_end_ns(prefix) + + cdef int _handleSaxData(self, data) except -1: + self._target_data(data) + + cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1: + self._target_doctype(root_tag, public_id, system_id) + + cdef _handleSaxPi(self, target, data): + return self._target_pi(target, data) + + cdef _handleSaxComment(self, comment): + return self._target_comment(comment) + + +@cython.final +@cython.internal +@cython.no_gc_clear # Required because parent class uses it - Cython bug. +cdef class _TargetParserContext(_SaxParserContext): + """This class maps SAX2 events to the ET parser target interface. + """ + cdef object _python_target + cdef int _setTarget(self, target) except -1: + self._python_target = target + if not isinstance(target, _SaxParserTarget) or \ + hasattr(target, '__dict__'): + target = _PythonSaxParserTarget(target) + self._setSaxParserTarget(target) + return 0 + + cdef _ParserContext _copy(self): + cdef _TargetParserContext context + context = _ParserContext._copy(self) + context._setTarget(self._python_target) + return context + + cdef void _cleanupTargetParserContext(self, xmlDoc* result) noexcept: + if self._c_ctxt.myDoc is not NULL: + if self._c_ctxt.myDoc is not result and \ + self._c_ctxt.myDoc._private is NULL: + # no _Document proxy => orphen + tree.xmlFreeDoc(self._c_ctxt.myDoc) + self._c_ctxt.myDoc = NULL + + cdef object _handleParseResult(self, _BaseParser parser, xmlDoc* result, + filename): + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + try: + if self._has_raised(): + self._cleanupTargetParserContext(result) + self._raise_if_stored() + if not self._c_ctxt.wellFormed and not recover: + _raiseParseError(self._c_ctxt, filename, self._error_log) + except: + self._python_target.close() + raise + return self._python_target.close() + + cdef xmlDoc* _handleParseResultDoc(self, _BaseParser parser, + xmlDoc* result, filename) except NULL: + cdef bint recover + recover = parser._parse_options & xmlparser.XML_PARSE_RECOVER + if result is not NULL and result._private is NULL: + # no _Document proxy => orphen + tree.xmlFreeDoc(result) + try: + self._cleanupTargetParserContext(result) + self._raise_if_stored() + if not self._c_ctxt.wellFormed and not recover: + _raiseParseError(self._c_ctxt, filename, self._error_log) + except: + self._python_target.close() + raise + parse_result = self._python_target.close() + raise _TargetParserResult(parse_result) diff --git a/.venv/lib/python3.12/site-packages/lxml/proxy.pxi b/.venv/lib/python3.12/site-packages/lxml/proxy.pxi new file mode 100644 index 00000000..f7b47a73 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/proxy.pxi @@ -0,0 +1,619 @@ +# Proxy functions and low level node allocation stuff + +# Proxies represent elements, their reference is stored in the C +# structure of the respective node to avoid multiple instantiation of +# the Python class. + +@cython.linetrace(False) +@cython.profile(False) +cdef inline _Element getProxy(xmlNode* c_node): + """Get a proxy for a given node. + """ + #print "getProxy for:", <int>c_node + if c_node is not NULL and c_node._private is not NULL: + return <_Element>c_node._private + else: + return None + + +@cython.linetrace(False) +@cython.profile(False) +cdef inline bint hasProxy(xmlNode* c_node): + if c_node._private is NULL: + return False + return True + + +@cython.linetrace(False) +@cython.profile(False) +cdef inline int _registerProxy(_Element proxy, _Document doc, + xmlNode* c_node) except -1: + """Register a proxy and type for the node it's proxying for. + """ + #print "registering for:", <int>proxy._c_node + assert not hasProxy(c_node), "double registering proxy!" + proxy._doc = doc + proxy._c_node = c_node + c_node._private = <void*>proxy + return 0 + + +@cython.linetrace(False) +@cython.profile(False) +cdef inline int _unregisterProxy(_Element proxy) except -1: + """Unregister a proxy for the node it's proxying for. + """ + cdef xmlNode* c_node = proxy._c_node + assert c_node._private is <void*>proxy, "Tried to unregister unknown proxy" + c_node._private = NULL + return 0 + + +################################################################################ +# temporarily make a node the root node of its document + +cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL: + return _plainFakeRootDoc(c_base_doc, c_node, 1) + +cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node, + bint with_siblings) except NULL: + # build a temporary document that has the given node as root node + # note that copy and original must not be modified during its lifetime!! + # always call _destroyFakeDoc() after use! + cdef xmlNode* c_child + cdef xmlNode* c_root + cdef xmlNode* c_new_root + cdef xmlDoc* c_doc + if with_siblings or (c_node.prev is NULL and c_node.next is NULL): + c_root = tree.xmlDocGetRootElement(c_base_doc) + if c_root is c_node: + # already the root node, no siblings + return c_base_doc + + c_doc = _copyDoc(c_base_doc, 0) # non recursive! + c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive! + tree.xmlDocSetRootElement(c_doc, c_new_root) + _copyParentNamespaces(c_node, c_new_root) + + c_new_root.children = c_node.children + c_new_root.last = c_node.last + c_new_root.next = c_new_root.prev = NULL + + # store original node + c_doc._private = c_node + + # divert parent pointers of children + c_child = c_new_root.children + while c_child is not NULL: + c_child.parent = c_new_root + c_child = c_child.next + + c_doc.children = c_new_root + return c_doc + +cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc) noexcept: + # delete a temporary document + cdef xmlNode* c_child + cdef xmlNode* c_parent + cdef xmlNode* c_root + if c_doc is c_base_doc: + return + c_root = tree.xmlDocGetRootElement(c_doc) + + # restore parent pointers of children + c_parent = <xmlNode*>c_doc._private + c_child = c_root.children + while c_child is not NULL: + c_child.parent = c_parent + c_child = c_child.next + + # prevent recursive removal of children + c_root.children = c_root.last = NULL + tree.xmlFreeDoc(c_doc) + +cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element): + """Special element factory for cases where we need to create a fake + root document, but still need to instantiate arbitrary nodes from + it. If we instantiate the fake root node, things will turn bad + when it's destroyed. + + Instead, if we are asked to instantiate the fake root node, we + instantiate the original node instead. + """ + if c_element.doc is not doc._c_doc: + if c_element.doc._private is not NULL: + if c_element is c_element.doc.children: + c_element = <xmlNode*>c_element.doc._private + #assert c_element.type == tree.XML_ELEMENT_NODE + return _elementFactory(doc, c_element) + +################################################################################ +# support for freeing tree elements when proxy objects are destroyed + +cdef int attemptDeallocation(xmlNode* c_node) noexcept: + """Attempt deallocation of c_node (or higher up in tree). + """ + cdef xmlNode* c_top + # could be we actually aren't referring to the tree at all + if c_node is NULL: + #print "not freeing, node is NULL" + return 0 + c_top = getDeallocationTop(c_node) + if c_top is not NULL: + #print "freeing:", c_top.name + _removeText(c_top.next) # tail + tree.xmlFreeNode(c_top) + return 1 + return 0 + +cdef xmlNode* getDeallocationTop(xmlNode* c_node) noexcept: + """Return the top of the tree that can be deallocated, or NULL. + """ + cdef xmlNode* c_next + #print "trying to do deallocating:", c_node.type + if hasProxy(c_node): + #print "Not freeing: proxies still exist" + return NULL + while c_node.parent is not NULL: + c_node = c_node.parent + #print "checking:", c_current.type + if c_node.type == tree.XML_DOCUMENT_NODE or \ + c_node.type == tree.XML_HTML_DOCUMENT_NODE: + #print "not freeing: still in doc" + return NULL + # if we're still attached to the document, don't deallocate + if hasProxy(c_node): + #print "Not freeing: proxies still exist" + return NULL + # see whether we have children to deallocate + if not canDeallocateChildNodes(c_node): + return NULL + # see whether we have siblings to deallocate + c_next = c_node.prev + while c_next: + if _isElement(c_next): + if hasProxy(c_next) or not canDeallocateChildNodes(c_next): + return NULL + c_next = c_next.prev + c_next = c_node.next + while c_next: + if _isElement(c_next): + if hasProxy(c_next) or not canDeallocateChildNodes(c_next): + return NULL + c_next = c_next.next + return c_node + +cdef int canDeallocateChildNodes(xmlNode* c_parent) noexcept: + cdef xmlNode* c_node + c_node = c_parent.children + tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1) + if hasProxy(c_node): + return 0 + tree.END_FOR_EACH_ELEMENT_FROM(c_node) + return 1 + +################################################################################ +# fix _Document references and namespaces when a node changes documents + +cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) noexcept nogil: + """Copy the namespaces of all ancestors of c_from_node to c_to_node. + """ + cdef xmlNode* c_parent + cdef xmlNs* c_ns + cdef xmlNs* c_new_ns + cdef int prefix_known + c_parent = c_from_node.parent + while c_parent and (tree._isElementOrXInclude(c_parent) or + c_parent.type == tree.XML_DOCUMENT_NODE): + c_new_ns = c_parent.nsDef + while c_new_ns: + # libxml2 will check if the prefix is already defined + tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix) + c_new_ns = c_new_ns.next + c_parent = c_parent.parent + + +ctypedef struct _ns_update_map: + xmlNs* old + xmlNs* new + + +ctypedef struct _nscache: + _ns_update_map* ns_map + size_t size + size_t last + + +cdef int _growNsCache(_nscache* c_ns_cache) except -1: + cdef _ns_update_map* ns_map_ptr + if c_ns_cache.size == 0: + c_ns_cache.size = 20 + else: + c_ns_cache.size *= 2 + ns_map_ptr = <_ns_update_map*> python.lxml_realloc( + c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map)) + if not ns_map_ptr: + python.lxml_free(c_ns_cache.ns_map) + c_ns_cache.ns_map = NULL + raise MemoryError() + c_ns_cache.ns_map = ns_map_ptr + return 0 + + +cdef inline int _appendToNsCache(_nscache* c_ns_cache, + xmlNs* c_old_ns, xmlNs* c_new_ns) except -1: + if c_ns_cache.last >= c_ns_cache.size: + _growNsCache(c_ns_cache) + c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns) + c_ns_cache.last += 1 + + +cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache, + xmlNs** c_del_ns_list) except -1: + """Removes namespace declarations from an element that are already + defined in its parents. Does not free the xmlNs's, just prepends + them to the c_del_ns_list. + """ + cdef xmlNs* c_ns + cdef xmlNs* c_ns_next + cdef xmlNs** c_nsdef + # use a xmlNs** to handle assignments to "c_element.nsDef" correctly + c_nsdef = &c_element.nsDef + while c_nsdef[0] is not NULL: + c_ns = tree.xmlSearchNsByHref( + c_element.doc, c_element.parent, c_nsdef[0].href) + if c_ns is NULL: + # new namespace href => keep and cache the ns declaration + _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0]) + c_nsdef = &c_nsdef[0].next + else: + # known namespace href => cache mapping and strip old ns + _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns) + # cut out c_nsdef.next and prepend it to garbage chain + c_ns_next = c_nsdef[0].next + c_nsdef[0].next = c_del_ns_list[0] + c_del_ns_list[0] = c_nsdef[0] + c_nsdef[0] = c_ns_next + return 0 + + +cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node, + _nscache* c_ns_cache, xmlNs* c_del_ns_list) noexcept: + # Try to recover from exceptions with really bad timing. We were in the middle + # of ripping out xmlNS-es and likely ran out of memory. Try to fix up the tree + # by re-adding the original xmlNs declarations (which might still be used in some + # places). + if c_ns_cache.ns_map: + python.lxml_free(c_ns_cache.ns_map) + if c_del_ns_list: + if not c_start_node.nsDef: + c_start_node.nsDef = c_del_ns_list + else: + c_ns = c_start_node.nsDef + while c_ns.next: + c_ns = c_ns.next + c_ns.next = c_del_ns_list + + +cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc, + xmlNode* c_element) except -1: + """Fix the xmlNs pointers of a node and its subtree that were moved. + + Originally copied from libxml2's xmlReconciliateNs(). Expects + libxml2 doc pointers of node to be correct already, but fixes + _Document references. + + For each node in the subtree, we do this: + + 1) Remove redundant declarations of namespace that are already + defined in its parents. + + 2) Replace namespaces that are *not* defined on the node or its + parents by the equivalent namespace declarations that *are* + defined on the node or its parents (possibly using a different + prefix). If a namespace is unknown, declare a new one on the + node. + + 3) Reassign the names of tags and attribute from the dict of the + target document *iff* it is different from the dict used in the + source subtree. + + 4) Set the Document reference to the new Document (if different). + This is done on backtracking to keep the original Document + alive as long as possible, until all its elements are updated. + + Note that the namespace declarations are removed from the tree in + step 1), but freed only after the complete subtree was traversed + and all occurrences were replaced by tree-internal pointers. + """ + cdef xmlNode* c_start_node + cdef xmlNode* c_node + cdef xmlDoc* c_doc = doc._c_doc + cdef tree.xmlAttr* c_attr + cdef char* c_name + cdef _nscache c_ns_cache = [NULL, 0, 0] + cdef xmlNs* c_del_ns_list = NULL + cdef proxy_count = 0 + + if not tree._isElementOrXInclude(c_element): + return 0 + + c_start_node = c_element + + tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1) + if tree._isElementOrXInclude(c_element): + if hasProxy(c_element): + proxy_count += 1 + + # 1) cut out namespaces defined here that are already known by + # the ancestors + if c_element.nsDef is not NULL: + try: + _stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list) + except: + _cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list) + raise + + # 2) make sure the namespaces of an element and its attributes + # are declared in this document (i.e. on the node or its parents) + if c_element.ns is not NULL: + _fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list) + + c_node = <xmlNode*>c_element.properties + while c_node is not NULL: + if c_node.ns is not NULL: + _fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list) + c_node = c_node.next + + tree.END_FOR_EACH_FROM(c_element) + + # free now unused namespace declarations + if c_del_ns_list is not NULL: + tree.xmlFreeNsList(c_del_ns_list) + + # cleanup + if c_ns_cache.ns_map is not NULL: + python.lxml_free(c_ns_cache.ns_map) + + # 3) fix the names in the tree if we moved it from a different thread + if doc._c_doc.dict is not c_source_doc.dict: + fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict) + + # 4) fix _Document references + # (and potentially deallocate the source document) + if proxy_count > 0: + if proxy_count == 1 and c_start_node._private is not NULL: + proxy = getProxy(c_start_node) + if proxy is not None: + if proxy._doc is not doc: + proxy._doc = doc + else: + fixElementDocument(c_start_node, doc, proxy_count) + else: + fixElementDocument(c_start_node, doc, proxy_count) + + return 0 + + +cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc) noexcept: + """Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively. + It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42 + """ + tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1) + if c_node.type == tree.XML_ELEMENT_NODE: + c_attr = <tree.xmlAttr*>c_node.properties + while c_attr: + if c_attr.atype == tree.XML_ATTRIBUTE_ID: + tree.xmlRemoveID(c_node.doc, c_attr) + c_attr.doc = c_doc + _fixDocChildren(c_attr.children, c_doc) + c_attr = c_attr.next + # Set doc link for all nodes, not only elements. + c_node.doc = c_doc + tree.END_FOR_EACH_FROM(c_node) + + +cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc) noexcept: + while c_child: + c_child.doc = c_doc + if c_child.children: + _fixDocChildren(c_child.children, c_doc) + c_child = c_child.next + + +cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node, + _nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1: + cdef xmlNs* c_ns = NULL + cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix) + + for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]: + if c_node.ns is ns_map.old: + if is_prefixed_attr and not ns_map.new.prefix: + # avoid dropping prefix from attributes + continue + c_ns = ns_map.new + break + + if c_ns: + c_node.ns = c_ns + else: + # not in cache or not acceptable + # => find a replacement from this document + try: + c_ns = doc._findOrBuildNodeNs( + c_start_node, c_node.ns.href, c_node.ns.prefix, + c_node.type == tree.XML_ATTRIBUTE_NODE) + c_node.ns = c_ns + _appendToNsCache(c_ns_cache, c_node.ns, c_ns) + except: + _cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list) + raise + return 0 + + +cdef int fixElementDocument(xmlNode* c_element, _Document doc, + size_t proxy_count) except -1: + cdef xmlNode* c_node = c_element + cdef _Element proxy = None # init-to-None required due to fake-loop below + tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) + if c_node._private is not NULL: + proxy = getProxy(c_node) + if proxy is not None: + if proxy._doc is not doc: + proxy._doc = doc + proxy_count -= 1 + if proxy_count == 0: + return 0 + tree.END_FOR_EACH_FROM(c_node) + + +cdef void fixThreadDictNames(xmlNode* c_element, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + # re-assign the names of tags and attributes + # + # this should only be called when the element is based on a + # different libxml2 tag name dictionary + if c_element.type == tree.XML_DOCUMENT_NODE or \ + c_element.type == tree.XML_HTML_DOCUMENT_NODE: + # may define "xml" namespace + fixThreadDictNsForNode(c_element, c_src_dict, c_dict) + if c_element.doc.extSubset: + fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict) + if c_element.doc.intSubset: + fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict) + c_element = c_element.children + while c_element is not NULL: + fixThreadDictNamesForNode(c_element, c_src_dict, c_dict) + c_element = c_element.next + elif tree._isElementOrXInclude(c_element): + fixThreadDictNamesForNode(c_element, c_src_dict, c_dict) + + +cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + c_str = c_ptr[0] + if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str): + # return value can be NULL on memory error, but we don't handle that here + c_str = tree.xmlDictLookup(c_dict, c_str, -1) + if c_str: + c_ptr[0] = c_str + + +cdef void fixThreadDictNamesForNode(xmlNode* c_element, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + cdef xmlNode* c_node = c_element + tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1) + if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START): + fixThreadDictNamesForAttributes( + c_node.properties, c_src_dict, c_dict) + fixThreadDictNsForNode(c_node, c_src_dict, c_dict) + _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict) + elif c_node.type == tree.XML_TEXT_NODE: + # libxml2's SAX2 parser interns some indentation space + fixThreadDictContentForNode(c_node, c_src_dict, c_dict) + elif c_node.type == tree.XML_COMMENT_NODE: + pass # don't touch c_node.name + else: + _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict) + tree.END_FOR_EACH_FROM(c_node) + + +cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + cdef xmlNode* c_child + cdef xmlNode* c_node = <xmlNode*>c_attr + while c_node is not NULL: + if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE): + _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict) + # libxml2 keeps some (!) attribute values in the dict + c_child = c_node.children + while c_child is not NULL: + fixThreadDictContentForNode(c_child, c_src_dict, c_dict) + c_child = c_child.next + c_node = c_node.next + + +cdef inline void fixThreadDictContentForNode(xmlNode* c_node, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + if c_node.content is not NULL and \ + c_node.content is not <xmlChar*>&c_node.properties: + if tree.xmlDictOwns(c_src_dict, c_node.content): + # result can be NULL on memory error, but we don't handle that here + c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1) + + +cdef inline void fixThreadDictNsForNode(xmlNode* c_node, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + cdef xmlNs* c_ns = c_node.nsDef + while c_ns is not NULL: + _fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict) + _fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict) + c_ns = c_ns.next + + +cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd, + tree.xmlDict* c_src_dict, + tree.xmlDict* c_dict) noexcept nogil: + cdef xmlNode* c_node + cdef tree.xmlElement* c_element + cdef tree.xmlAttribute* c_attribute + cdef tree.xmlEntity* c_entity + + c_node = c_dtd.children + while c_node: + if c_node.type == tree.XML_ELEMENT_DECL: + c_element = <tree.xmlElement*>c_node + if c_element.content: + _fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict) + _fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict) + c_attribute = c_element.attributes + while c_attribute: + _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict) + _fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict) + _fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict) + _fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict) + c_attribute = c_attribute.nexth + elif c_node.type == tree.XML_ENTITY_DECL: + c_entity = <tree.xmlEntity*>c_node + _fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict) + _fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict) + _fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict) + _fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict) + c_node = c_node.next + + +################################################################################ +# adopt an xmlDoc from an external libxml2 document source + +cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True): + """Convert and wrap an externally produced xmlDoc for use in lxml. + Assures that all '_private' pointers are NULL to prevent accidental + dereference into lxml proxy objects. + """ + if c_doc is NULL: + raise ValueError("Illegal document provided: NULL") + if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE): + doc_type = c_doc.type + if is_owned: + tree.xmlFreeDoc(c_doc) + raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}") + + cdef xmlNode* c_node = <xmlNode*>c_doc + + if is_owned: + tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1) + c_node._private = NULL + tree.END_FOR_EACH_FROM(c_node) + else: + # create a fresh copy that lxml owns + c_doc = tree.xmlCopyDoc(c_doc, 1) + if c_doc is NULL: + raise MemoryError() + + return _documentFactory(c_doc, parser) diff --git a/.venv/lib/python3.12/site-packages/lxml/public-api.pxi b/.venv/lib/python3.12/site-packages/lxml/public-api.pxi new file mode 100644 index 00000000..fb8b2a2c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/public-api.pxi @@ -0,0 +1,178 @@ +# Public C API for lxml.etree + +cdef public api _Element deepcopyNodeToDocument(_Document doc, xmlNode* c_root): + "Recursively copy the element into the document. doc is not modified." + cdef xmlNode* c_node + c_node = _copyNodeToDoc(c_root, doc._c_doc) + return _elementFactory(doc, c_node) + +cdef public api _ElementTree elementTreeFactory(_Element context_node): + _assertValidNode(context_node) + return newElementTree(context_node, _ElementTree) + +cdef public api _ElementTree newElementTree(_Element context_node, + object subclass): + if <void*>context_node is NULL or context_node is None: + raise TypeError + _assertValidNode(context_node) + return _newElementTree(context_node._doc, context_node, subclass) + +cdef public api _ElementTree adoptExternalDocument(xmlDoc* c_doc, parser, bint is_owned): + if c_doc is NULL: + raise TypeError + doc = _adoptForeignDoc(c_doc, parser, is_owned) + return _elementTreeFactory(doc, None) + +cdef public api _Element elementFactory(_Document doc, xmlNode* c_node): + if c_node is NULL or doc is None: + raise TypeError + return _elementFactory(doc, c_node) + +cdef public api _Element makeElement(tag, _Document doc, parser, + text, tail, attrib, nsmap): + return _makeElement(tag, NULL, doc, parser, text, tail, attrib, nsmap, None) + +cdef public api _Element makeSubElement(_Element parent, tag, text, tail, + attrib, nsmap): + _assertValidNode(parent) + return _makeSubElement(parent, tag, text, tail, attrib, nsmap, None) + +cdef public api void setElementClassLookupFunction( + _element_class_lookup_function function, state): + _setElementClassLookupFunction(function, state) + +cdef public api object lookupDefaultElementClass(state, doc, xmlNode* c_node): + return _lookupDefaultElementClass(state, doc, c_node) + +cdef public api object lookupNamespaceElementClass(state, doc, xmlNode* c_node): + return _find_nselement_class(state, doc, c_node) + +cdef public api object callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, xmlNode* c_node): + return _callLookupFallback(lookup, doc, c_node) + +cdef public api int tagMatches(xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name): + if c_node is NULL: + return -1 + return _tagMatches(c_node, c_href, c_name) + +cdef public api _Document documentOrRaise(object input): + return _documentOrRaise(input) + +cdef public api _Element rootNodeOrRaise(object input): + return _rootNodeOrRaise(input) + +cdef public api bint hasText(xmlNode* c_node): + return _hasText(c_node) + +cdef public api bint hasTail(xmlNode* c_node): + return _hasTail(c_node) + +cdef public api unicode textOf(xmlNode* c_node): + if c_node is NULL: + return None + return _collectText(c_node.children) + +cdef public api unicode tailOf(xmlNode* c_node): + if c_node is NULL: + return None + return _collectText(c_node.next) + +cdef public api int setNodeText(xmlNode* c_node, text) except -1: + if c_node is NULL: + raise ValueError + return _setNodeText(c_node, text) + +cdef public api int setTailText(xmlNode* c_node, text) except -1: + if c_node is NULL: + raise ValueError + return _setTailText(c_node, text) + +cdef public api unicode attributeValue(xmlNode* c_element, xmlAttr* c_attrib_node): + return _attributeValue(c_element, c_attrib_node) + +cdef public api unicode attributeValueFromNsName(xmlNode* c_element, + const_xmlChar* ns, const_xmlChar* name): + return _attributeValueFromNsName(c_element, ns, name) + +cdef public api object getAttributeValue(_Element element, key, default): + _assertValidNode(element) + return _getAttributeValue(element, key, default) + +cdef public api object iterattributes(_Element element, int keysvalues): + _assertValidNode(element) + return _attributeIteratorFactory(element, keysvalues) + +cdef public api list collectAttributes(xmlNode* c_element, int keysvalues): + return _collectAttributes(c_element, keysvalues) + +cdef public api int setAttributeValue(_Element element, key, value) except -1: + _assertValidNode(element) + return _setAttributeValue(element, key, value) + +cdef public api int delAttribute(_Element element, key) except -1: + _assertValidNode(element) + return _delAttribute(element, key) + +cdef public api int delAttributeFromNsName(tree.xmlNode* c_element, + const_xmlChar* c_href, const_xmlChar* c_name): + return _delAttributeFromNsName(c_element, c_href, c_name) + +cdef public api bint hasChild(xmlNode* c_node): + return _hasChild(c_node) + +cdef public api xmlNode* findChild(xmlNode* c_node, Py_ssize_t index): + return _findChild(c_node, index) + +cdef public api xmlNode* findChildForwards(xmlNode* c_node, Py_ssize_t index): + return _findChildForwards(c_node, index) + +cdef public api xmlNode* findChildBackwards(xmlNode* c_node, Py_ssize_t index): + return _findChildBackwards(c_node, index) + +cdef public api xmlNode* nextElement(xmlNode* c_node): + return _nextElement(c_node) + +cdef public api xmlNode* previousElement(xmlNode* c_node): + return _previousElement(c_node) + +cdef public api void appendChild(_Element parent, _Element child): + # deprecated, use appendChildToElement() instead! + _appendChild(parent, child) + +cdef public api int appendChildToElement(_Element parent, _Element child) except -1: + return _appendChild(parent, child) + +cdef public api unicode pyunicode(const_xmlChar* s): + if s is NULL: + raise TypeError + return funicode(s) + +cdef public api bytes utf8(object s): + return _utf8(s) + +cdef public api tuple getNsTag(object tag): + return _getNsTag(tag) + +cdef public api tuple getNsTagWithEmptyNs(object tag): + return _getNsTagWithEmptyNs(tag) + +cdef public api unicode namespacedName(xmlNode* c_node): + return _namespacedName(c_node) + +cdef public api unicode namespacedNameFromNsName(const_xmlChar* href, const_xmlChar* name): + return _namespacedNameFromNsName(href, name) + +cdef public api void iteratorStoreNext(_ElementIterator iterator, _Element node): + # deprecated! + iterator._storeNext(node) + +cdef public api void initTagMatch(_ElementTagMatcher matcher, tag): + # deprecated! + matcher._initTagMatch(tag) + +cdef public api tree.xmlNs* findOrBuildNodeNsPrefix( + _Document doc, xmlNode* c_node, const_xmlChar* href, const_xmlChar* prefix) except NULL: + if doc is None: + raise TypeError + return doc._findOrBuildNodeNs(c_node, href, prefix, 0) diff --git a/.venv/lib/python3.12/site-packages/lxml/pyclasslookup.py b/.venv/lib/python3.12/site-packages/lxml/pyclasslookup.py new file mode 100644 index 00000000..9e1496df --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/pyclasslookup.py @@ -0,0 +1,3 @@ +# dummy module for backwards compatibility + +from lxml.etree import PythonElementClassLookup diff --git a/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi b/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi new file mode 100644 index 00000000..9bc9a660 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/readonlytree.pxi @@ -0,0 +1,565 @@ +# read-only tree implementation + +@cython.internal +cdef class _ReadOnlyProxy: + "A read-only proxy class suitable for PIs/Comments (for internal use only!)." + cdef bint _free_after_use + cdef xmlNode* _c_node + cdef _ReadOnlyProxy _source_proxy + cdef list _dependent_proxies + def __cinit__(self): + self._c_node = NULL + self._free_after_use = 0 + + cdef int _assertNode(self) except -1: + """This is our way of saying: this proxy is invalid! + """ + if not self._c_node: + raise ReferenceError("Proxy invalidated!") + return 0 + + cdef int _raise_unsupported_type(self) except -1: + raise TypeError(f"Unsupported node type: {self._c_node.type}") + + cdef void free_after_use(self) noexcept: + """Should the xmlNode* be freed when releasing the proxy? + """ + self._free_after_use = 1 + + @property + def tag(self): + """Element tag + """ + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _namespacedName(self._c_node) + elif self._c_node.type == tree.XML_PI_NODE: + return ProcessingInstruction + elif self._c_node.type == tree.XML_COMMENT_NODE: + return Comment + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return Entity + else: + self._raise_unsupported_type() + + @property + def text(self): + """Text before the first subelement. This is either a string or + the value None, if there was no text. + """ + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return _collectText(self._c_node.children) + elif self._c_node.type in (tree.XML_PI_NODE, + tree.XML_COMMENT_NODE): + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + elif self._c_node.type == tree.XML_ENTITY_REF_NODE: + return f'&{funicode(self._c_node.name)};' + else: + self._raise_unsupported_type() + + @property + def tail(self): + """Text after this element's end tag, but before the next sibling + element's start tag. This is either a string or the value None, if + there was no text. + """ + self._assertNode() + return _collectText(self._c_node.next) + + @property + def sourceline(self): + """Original line number as found by the parser or None if unknown. + """ + cdef long line + self._assertNode() + line = tree.xmlGetLineNo(self._c_node) + if line > 0: + return line + else: + return None + + def __repr__(self): + self._assertNode() + if self._c_node.type == tree.XML_ELEMENT_NODE: + return "<Element %s at 0x%x>" % (self.tag, id(self)) + elif self._c_node.type == tree.XML_COMMENT_NODE: + return "<!--%s-->" % self.text + elif self._c_node.type == tree.XML_ENTITY_NODE: + return "&%s;" % funicode(self._c_node.name) + elif self._c_node.type == tree.XML_PI_NODE: + text = self.text + if text: + return "<?%s %s?>" % (self.target, text) + else: + return "<?%s?>" % self.target + else: + self._raise_unsupported_type() + + def __getitem__(self, x): + """Returns the subelement at the given position or the requested + slice. + """ + cdef xmlNode* c_node = NULL + cdef Py_ssize_t step = 0, slicelength = 0 + cdef Py_ssize_t c, i + cdef _node_to_node_function next_element + cdef list result + self._assertNode() + if isinstance(x, slice): + # slicing + if _isFullSlice(<slice>x): + return _collectChildren(self) + _findChildSlice(<slice>x, self._c_node, &c_node, &step, &slicelength) + if c_node is NULL: + return [] + if step > 0: + next_element = _nextElement + else: + step = -step + next_element = _previousElement + result = [] + c = 0 + while c_node is not NULL and c < slicelength: + result.append(_newReadOnlyProxy(self._source_proxy, c_node)) + result.append(_elementFactory(self._doc, c_node)) + c = c + 1 + for i from 0 <= i < step: + c_node = next_element(c_node) + return result + else: + # indexing + c_node = _findChild(self._c_node, x) + if c_node is NULL: + raise IndexError, "list index out of range" + return _newReadOnlyProxy(self._source_proxy, c_node) + + def __len__(self): + """Returns the number of subelements. + """ + cdef Py_ssize_t c + cdef xmlNode* c_node + self._assertNode() + c = 0 + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + c = c + 1 + c_node = c_node.next + return c + + def __bool__(self): + cdef xmlNode* c_node + self._assertNode() + c_node = _findChildBackwards(self._c_node, 0) + return c_node != NULL + + def __deepcopy__(self, memo): + "__deepcopy__(self, memo)" + return self.__copy__() + + cpdef __copy__(self): + "__copy__(self)" + cdef xmlDoc* c_doc + cdef xmlNode* c_node + cdef _Document new_doc + if self._c_node is NULL: + return self + c_doc = _copyDocRoot(self._c_node.doc, self._c_node) # recursive + new_doc = _documentFactory(c_doc, None) + root = new_doc.getroot() + if root is not None: + return root + # Comment/PI + c_node = c_doc.children + while c_node is not NULL and c_node.type != self._c_node.type: + c_node = c_node.next + if c_node is NULL: + return None + return _elementFactory(new_doc, c_node) + + def __iter__(self): + return iter(self.getchildren()) + + def iterchildren(self, tag=None, *, reversed=False): + """iterchildren(self, tag=None, reversed=False) + + Iterate over the children of this element. + """ + children = self.getchildren() + if tag is not None and tag != '*': + children = [ el for el in children if el.tag == tag ] + if reversed: + children = children[::-1] + return iter(children) + + cpdef getchildren(self): + """Returns all subelements. The elements are returned in document + order. + """ + cdef xmlNode* c_node + cdef list result + self._assertNode() + result = [] + c_node = self._c_node.children + while c_node is not NULL: + if tree._isElement(c_node): + result.append(_newReadOnlyProxy(self._source_proxy, c_node)) + c_node = c_node.next + return result + + def getparent(self): + """Returns the parent of this element or None for the root element. + """ + cdef xmlNode* c_parent + self._assertNode() + c_parent = self._c_node.parent + if c_parent is NULL or not tree._isElement(c_parent): + return None + else: + return _newReadOnlyProxy(self._source_proxy, c_parent) + + def getnext(self): + """Returns the following sibling of this element or None. + """ + cdef xmlNode* c_node + self._assertNode() + c_node = _nextElement(self._c_node) + if c_node is not NULL: + return _newReadOnlyProxy(self._source_proxy, c_node) + return None + + def getprevious(self): + """Returns the preceding sibling of this element or None. + """ + cdef xmlNode* c_node + self._assertNode() + c_node = _previousElement(self._c_node) + if c_node is not NULL: + return _newReadOnlyProxy(self._source_proxy, c_node) + return None + + +@cython.final +@cython.internal +cdef class _ReadOnlyPIProxy(_ReadOnlyProxy): + """A read-only proxy for processing instructions (for internal use only!)""" + @property + def target(self): + self._assertNode() + return funicode(self._c_node.name) + +@cython.final +@cython.internal +cdef class _ReadOnlyEntityProxy(_ReadOnlyProxy): + """A read-only proxy for entity references (for internal use only!)""" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value_utf = _utf8(value) + if '&' in value or ';' in value: + raise ValueError(f"Invalid entity name '{value}'") + tree.xmlNodeSetName(self._c_node, _xcstr(value_utf)) + + @property + def text(self): + return f'&{funicode(self._c_node.name)};' + + +@cython.internal +cdef class _ReadOnlyElementProxy(_ReadOnlyProxy): + """The main read-only Element proxy class (for internal use only!).""" + + @property + def attrib(self): + self._assertNode() + return dict(_collectAttributes(self._c_node, 3)) + + @property + def prefix(self): + """Namespace prefix or None. + """ + self._assertNode() + if self._c_node.ns is not NULL: + if self._c_node.ns.prefix is not NULL: + return funicode(self._c_node.ns.prefix) + return None + + @property + def nsmap(self): + """Namespace prefix->URI mapping known in the context of this + Element. This includes all namespace declarations of the + parents. + + Note that changing the returned dict has no effect on the Element. + """ + self._assertNode() + return _build_nsmap(self._c_node) + + def get(self, key, default=None): + """Gets an element attribute. + """ + self._assertNode() + return _getNodeAttributeValue(self._c_node, key, default) + + def keys(self): + """Gets a list of attribute names. The names are returned in an + arbitrary order (just like for an ordinary Python dictionary). + """ + self._assertNode() + return _collectAttributes(self._c_node, 1) + + def values(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 2) + + def items(self): + """Gets element attributes, as a sequence. The attributes are returned + in an arbitrary order. + """ + self._assertNode() + return _collectAttributes(self._c_node, 3) + +cdef _ReadOnlyProxy _newReadOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = _ReadOnlyElementProxy.__new__(_ReadOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = _ReadOnlyPIProxy.__new__(_ReadOnlyPIProxy) + elif c_node.type in (tree.XML_COMMENT_NODE, + tree.XML_ENTITY_REF_NODE): + el = _ReadOnlyProxy.__new__(_ReadOnlyProxy) + else: + raise TypeError(f"Unsupported element type: {c_node.type}") + el._c_node = c_node + _initReadOnlyProxy(el, source_proxy) + return el + +cdef inline _initReadOnlyProxy(_ReadOnlyProxy el, + _ReadOnlyProxy source_proxy): + if source_proxy is None: + el._source_proxy = el + el._dependent_proxies = [el] + else: + el._source_proxy = source_proxy + source_proxy._dependent_proxies.append(el) + +cdef _freeReadOnlyProxies(_ReadOnlyProxy sourceProxy): + cdef xmlNode* c_node + cdef _ReadOnlyProxy el + if sourceProxy is None: + return + if sourceProxy._dependent_proxies is None: + return + for el in sourceProxy._dependent_proxies: + c_node = el._c_node + el._c_node = NULL + if el._free_after_use: + tree.xmlFreeNode(c_node) + del sourceProxy._dependent_proxies[:] + +# opaque wrapper around non-element nodes, e.g. the document node +# +# This class does not imply any restrictions on modifiability or +# read-only status of the node, so use with caution. + +@cython.internal +cdef class _OpaqueNodeWrapper: + cdef tree.xmlNode* _c_node + def __init__(self): + raise TypeError, "This type cannot be instantiated from Python" + +@cython.final +@cython.internal +cdef class _OpaqueDocumentWrapper(_OpaqueNodeWrapper): + cdef int _assertNode(self) except -1: + """This is our way of saying: this proxy is invalid! + """ + assert self._c_node is not NULL, "Proxy invalidated!" + return 0 + + cpdef append(self, other_element): + """Append a copy of an Element to the list of children. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + self._assertNode() + c_node = _roNodeOf(other_element) + if c_node.type == tree.XML_ELEMENT_NODE: + if tree.xmlDocGetRootElement(<tree.xmlDoc*>self._c_node) is not NULL: + raise ValueError, "cannot append, document already has a root element" + elif c_node.type not in (tree.XML_PI_NODE, tree.XML_COMMENT_NODE): + raise TypeError, f"unsupported element type for top-level node: {c_node.type}" + c_node = _copyNodeToDoc(c_node, <tree.xmlDoc*>self._c_node) + c_next = c_node.next + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) + + def extend(self, elements): + """Append a copy of all Elements from a sequence to the list of + children. + """ + self._assertNode() + for element in elements: + self.append(element) + +cdef _OpaqueNodeWrapper _newOpaqueAppendOnlyNodeWrapper(xmlNode* c_node): + cdef _OpaqueNodeWrapper node + if c_node.type in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE): + node = _OpaqueDocumentWrapper.__new__(_OpaqueDocumentWrapper) + else: + node = _OpaqueNodeWrapper.__new__(_OpaqueNodeWrapper) + node._c_node = c_node + return node + +# element proxies that allow restricted modification + +@cython.internal +cdef class _ModifyContentOnlyProxy(_ReadOnlyProxy): + """A read-only proxy that allows changing the text content. + """ + property text: + def __get__(self): + self._assertNode() + if self._c_node.content is NULL: + return '' + else: + return funicode(self._c_node.content) + + def __set__(self, value): + cdef tree.xmlDict* c_dict + self._assertNode() + if value is None: + c_text = <const_xmlChar*>NULL + else: + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetContent(self._c_node, c_text) + +@cython.final +@cython.internal +cdef class _ModifyContentOnlyPIProxy(_ModifyContentOnlyProxy): + """A read-only proxy that allows changing the text/target content of a + processing instruction. + """ + property target: + def __get__(self): + self._assertNode() + return funicode(self._c_node.name) + + def __set__(self, value): + self._assertNode() + value = _utf8(value) + c_text = _xcstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + +@cython.final +@cython.internal +cdef class _ModifyContentOnlyEntityProxy(_ModifyContentOnlyProxy): + "A read-only proxy for entity references (for internal use only!)" + property name: + def __get__(self): + return funicode(self._c_node.name) + + def __set__(self, value): + value = _utf8(value) + assert '&' not in value and ';' not in value, \ + f"Invalid entity name '{value}'" + c_text = _xcstr(value) + tree.xmlNodeSetName(self._c_node, c_text) + + +@cython.final +@cython.internal +cdef class _AppendOnlyElementProxy(_ReadOnlyElementProxy): + """A read-only element that allows adding children and changing the + text content (i.e. everything that adds to the subtree). + """ + cpdef append(self, other_element): + """Append a copy of an Element to the list of children. + """ + cdef xmlNode* c_next + cdef xmlNode* c_node + self._assertNode() + c_node = _roNodeOf(other_element) + c_node = _copyNodeToDoc(c_node, self._c_node.doc) + c_next = c_node.next + tree.xmlAddChild(self._c_node, c_node) + _moveTail(c_next, c_node) + + def extend(self, elements): + """Append a copy of all Elements from a sequence to the list of + children. + """ + self._assertNode() + for element in elements: + self.append(element) + + property text: + """Text before the first subelement. This is either a string or the + value None, if there was no text. + """ + def __get__(self): + self._assertNode() + return _collectText(self._c_node.children) + + def __set__(self, value): + self._assertNode() + if isinstance(value, QName): + value = _resolveQNameText(self, value).decode('utf8') + _setNodeText(self._c_node, value) + + +cdef _ReadOnlyProxy _newAppendOnlyProxy( + _ReadOnlyProxy source_proxy, xmlNode* c_node): + cdef _ReadOnlyProxy el + if c_node.type == tree.XML_ELEMENT_NODE: + el = _AppendOnlyElementProxy.__new__(_AppendOnlyElementProxy) + elif c_node.type == tree.XML_PI_NODE: + el = _ModifyContentOnlyPIProxy.__new__(_ModifyContentOnlyPIProxy) + elif c_node.type == tree.XML_COMMENT_NODE: + el = _ModifyContentOnlyProxy.__new__(_ModifyContentOnlyProxy) + else: + raise TypeError(f"Unsupported element type: {c_node.type}") + el._c_node = c_node + _initReadOnlyProxy(el, source_proxy) + return el + +cdef xmlNode* _roNodeOf(element) except NULL: + cdef xmlNode* c_node + if isinstance(element, _Element): + c_node = (<_Element>element)._c_node + elif isinstance(element, _ReadOnlyProxy): + c_node = (<_ReadOnlyProxy>element)._c_node + elif isinstance(element, _OpaqueNodeWrapper): + c_node = (<_OpaqueNodeWrapper>element)._c_node + else: + raise TypeError, f"invalid argument type {type(element)}" + + if c_node is NULL: + raise TypeError, "invalid element" + return c_node + +cdef xmlNode* _nonRoNodeOf(element) except NULL: + cdef xmlNode* c_node + if isinstance(element, _Element): + c_node = (<_Element>element)._c_node + elif isinstance(element, _AppendOnlyElementProxy): + c_node = (<_AppendOnlyElementProxy>element)._c_node + elif isinstance(element, _OpaqueNodeWrapper): + c_node = (<_OpaqueNodeWrapper>element)._c_node + else: + raise TypeError, f"invalid argument type {type(element)}" + + if c_node is NULL: + raise TypeError, "invalid element" + return c_node diff --git a/.venv/lib/python3.12/site-packages/lxml/relaxng.pxi b/.venv/lib/python3.12/site-packages/lxml/relaxng.pxi new file mode 100644 index 00000000..35f87589 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/relaxng.pxi @@ -0,0 +1,165 @@ +# support for RelaxNG validation +from lxml.includes cimport relaxng + +cdef object _rnc2rng +try: + import rnc2rng as _rnc2rng +except ImportError: + _rnc2rng = None + + +cdef int _require_rnc2rng() except -1: + if _rnc2rng is None: + raise RelaxNGParseError( + 'compact syntax not supported (please install rnc2rng)') + return 0 + + +cdef class RelaxNGError(LxmlError): + """Base class for RelaxNG errors. + """ + +cdef class RelaxNGParseError(RelaxNGError): + """Error while parsing an XML document as RelaxNG. + """ + +cdef class RelaxNGValidateError(RelaxNGError): + """Error while validating an XML document with a RelaxNG schema. + """ + + +################################################################################ +# RelaxNG + +cdef class RelaxNG(_Validator): + """RelaxNG(self, etree=None, file=None) + Turn a document into a Relax NG validator. + + Either pass a schema as Element or ElementTree, or pass a file or + filename through the ``file`` keyword argument. + """ + cdef relaxng.xmlRelaxNG* _c_schema + def __cinit__(self): + self._c_schema = NULL + + def __init__(self, etree=None, *, file=None): + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* fake_c_doc = NULL + cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt = NULL + _Validator.__init__(self) + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc) + elif file is not None: + if _isString(file): + if file[-4:].lower() == '.rnc': + _require_rnc2rng() + rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file))) + doc = _parseMemoryDocument(rng_data_utf8, parser=None, url=file) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc) + else: + doc = None + filename = _encodeFilename(file) + with self._error_log: + orig_loader = _register_document_loader() + parser_ctxt = relaxng.xmlRelaxNGNewParserCtxt(_cstr(filename)) + _reset_document_loader(orig_loader) + elif (_getFilenameForFile(file) or '')[-4:].lower() == '.rnc': + _require_rnc2rng() + rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file))) + doc = _parseMemoryDocument( + rng_data_utf8, parser=None, url=_getFilenameForFile(file)) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc) + else: + doc = _parseDocument(file, parser=None, base_url=None) + parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc) + else: + raise RelaxNGParseError, "No tree or file given" + + if parser_ctxt is NULL: + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) + raise RelaxNGParseError( + self._error_log._buildExceptionMessage( + "Document is not parsable as Relax NG"), + self._error_log) + + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + relaxng.xmlRelaxNGSetParserStructuredErrors( + parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + _connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGP) + self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt) + _connectGenericErrorLog(None) + + relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt) + if self._c_schema is NULL: + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) + raise RelaxNGParseError( + self._error_log._buildExceptionMessage( + "Document is not valid Relax NG"), + self._error_log) + if fake_c_doc is not NULL: + _destroyFakeDoc(doc._c_doc, fake_c_doc) + + def __dealloc__(self): + relaxng.xmlRelaxNGFree(self._c_schema) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using Relax NG. + + Returns true if document is valid, false if not.""" + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt + cdef int ret + + assert self._c_schema is not NULL, "RelaxNG instance not initialised" + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema) + if valid_ctxt is NULL: + raise MemoryError() + + try: + self._error_log.clear() + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + relaxng.xmlRelaxNGSetValidStructuredErrors( + valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + _connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGV) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + with nogil: + ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) + finally: + _connectGenericErrorLog(None) + relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt) + + if ret == -1: + raise RelaxNGValidateError( + "Internal error in Relax NG validation", + self._error_log) + if ret == 0: + return True + else: + return False + + @classmethod + def from_rnc_string(cls, src, base_url=None): + """Parse a RelaxNG schema in compact syntax from a text string + + Requires the rnc2rng package to be installed. + + Passing the source URL or file path of the source as 'base_url' + will enable resolving resource references relative to the source. + """ + _require_rnc2rng() + rng_str = utf8(_rnc2rng.dumps(_rnc2rng.loads(src))) + return cls(_parseMemoryDocument(rng_str, parser=None, url=base_url)) diff --git a/.venv/lib/python3.12/site-packages/lxml/sax.cpython-312-x86_64-linux-gnu.so b/.venv/lib/python3.12/site-packages/lxml/sax.cpython-312-x86_64-linux-gnu.so Binary files differnew file mode 100755 index 00000000..417417e6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/sax.cpython-312-x86_64-linux-gnu.so diff --git a/.venv/lib/python3.12/site-packages/lxml/sax.py b/.venv/lib/python3.12/site-packages/lxml/sax.py new file mode 100644 index 00000000..eee44226 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/sax.py @@ -0,0 +1,275 @@ +# cython: language_level=2 + +""" +SAX-based adapter to copy trees from/to the Python standard library. + +Use the `ElementTreeContentHandler` class to build an ElementTree from +SAX events. + +Use the `ElementTreeProducer` class or the `saxify()` function to fire +the SAX events of an ElementTree against a SAX ContentHandler. + +See https://lxml.de/sax.html +""" + + +from xml.sax.handler import ContentHandler +from lxml import etree +from lxml.etree import ElementTree, SubElement +from lxml.etree import Comment, ProcessingInstruction + + +class SaxError(etree.LxmlError): + """General SAX error. + """ + + +def _getNsTag(tag): + if tag[0] == '{': + return tuple(tag[1:].split('}', 1)) + else: + return None, tag + + +class ElementTreeContentHandler(ContentHandler): + """Build an lxml ElementTree from SAX events. + """ + def __init__(self, makeelement=None): + ContentHandler.__init__(self) + self._root = None + self._root_siblings = [] + self._element_stack = [] + self._default_ns = None + self._ns_mapping = { None : [None] } + self._new_mappings = {} + if makeelement is None: + makeelement = etree.Element + self._makeelement = makeelement + + def _get_etree(self): + "Contains the generated ElementTree after parsing is finished." + return ElementTree(self._root) + + etree = property(_get_etree, doc=_get_etree.__doc__) + + def setDocumentLocator(self, locator): + pass + + def startDocument(self): + pass + + def endDocument(self): + pass + + def startPrefixMapping(self, prefix, uri): + self._new_mappings[prefix] = uri + try: + self._ns_mapping[prefix].append(uri) + except KeyError: + self._ns_mapping[prefix] = [uri] + if prefix is None: + self._default_ns = uri + + def endPrefixMapping(self, prefix): + ns_uri_list = self._ns_mapping[prefix] + ns_uri_list.pop() + if prefix is None: + self._default_ns = ns_uri_list[-1] + + def _buildTag(self, ns_name_tuple): + ns_uri, local_name = ns_name_tuple + if ns_uri: + el_tag = "{%s}%s" % ns_name_tuple + elif self._default_ns: + el_tag = "{%s}%s" % (self._default_ns, local_name) + else: + el_tag = local_name + return el_tag + + def startElementNS(self, ns_name, qname, attributes=None): + el_name = self._buildTag(ns_name) + if attributes: + attrs = {} + try: + iter_attributes = attributes.iteritems() + except AttributeError: + iter_attributes = attributes.items() + + for name_tuple, value in iter_attributes: + if name_tuple[0]: + attr_name = "{%s}%s" % name_tuple + else: + attr_name = name_tuple[1] + attrs[attr_name] = value + else: + attrs = None + + element_stack = self._element_stack + if self._root is None: + element = self._root = \ + self._makeelement(el_name, attrs, self._new_mappings) + if self._root_siblings and hasattr(element, 'addprevious'): + for sibling in self._root_siblings: + element.addprevious(sibling) + del self._root_siblings[:] + else: + element = SubElement(element_stack[-1], el_name, + attrs, self._new_mappings) + element_stack.append(element) + + self._new_mappings.clear() + + def processingInstruction(self, target, data): + pi = ProcessingInstruction(target, data) + if self._root is None: + self._root_siblings.append(pi) + else: + self._element_stack[-1].append(pi) + + def endElementNS(self, ns_name, qname): + element = self._element_stack.pop() + el_tag = self._buildTag(ns_name) + if el_tag != element.tag: + raise SaxError("Unexpected element closed: " + el_tag) + + def startElement(self, name, attributes=None): + if attributes: + attributes = {(None, k): v for k, v in attributes.items()} + self.startElementNS((None, name), name, attributes) + + def endElement(self, name): + self.endElementNS((None, name), name) + + def characters(self, data): + last_element = self._element_stack[-1] + try: + # if there already is a child element, we must append to its tail + last_element = last_element[-1] + last_element.tail = (last_element.tail or '') + data + except IndexError: + # otherwise: append to the text + last_element.text = (last_element.text or '') + data + + ignorableWhitespace = characters + + +class ElementTreeProducer: + """Produces SAX events for an element and children. + """ + def __init__(self, element_or_tree, content_handler): + try: + element = element_or_tree.getroot() + except AttributeError: + element = element_or_tree + self._element = element + self._content_handler = content_handler + from xml.sax.xmlreader import AttributesNSImpl as attr_class + self._attr_class = attr_class + self._empty_attributes = attr_class({}, {}) + + def saxify(self): + self._content_handler.startDocument() + + element = self._element + if hasattr(element, 'getprevious'): + siblings = [] + sibling = element.getprevious() + while getattr(sibling, 'tag', None) is ProcessingInstruction: + siblings.append(sibling) + sibling = sibling.getprevious() + for sibling in siblings[::-1]: + self._recursive_saxify(sibling, {}) + + self._recursive_saxify(element, {}) + + if hasattr(element, 'getnext'): + sibling = element.getnext() + while getattr(sibling, 'tag', None) is ProcessingInstruction: + self._recursive_saxify(sibling, {}) + sibling = sibling.getnext() + + self._content_handler.endDocument() + + def _recursive_saxify(self, element, parent_nsmap): + content_handler = self._content_handler + tag = element.tag + if tag is Comment or tag is ProcessingInstruction: + if tag is ProcessingInstruction: + content_handler.processingInstruction( + element.target, element.text) + tail = element.tail + if tail: + content_handler.characters(tail) + return + + element_nsmap = element.nsmap + new_prefixes = [] + if element_nsmap != parent_nsmap: + # There have been updates to the namespace + for prefix, ns_uri in element_nsmap.items(): + if parent_nsmap.get(prefix) != ns_uri: + new_prefixes.append( (prefix, ns_uri) ) + + attribs = element.items() + if attribs: + attr_values = {} + attr_qnames = {} + for attr_ns_name, value in attribs: + attr_ns_tuple = _getNsTag(attr_ns_name) + attr_values[attr_ns_tuple] = value + attr_qnames[attr_ns_tuple] = self._build_qname( + attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap, + preferred_prefix=None, is_attribute=True) + sax_attributes = self._attr_class(attr_values, attr_qnames) + else: + sax_attributes = self._empty_attributes + + ns_uri, local_name = _getNsTag(tag) + qname = self._build_qname( + ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False) + + for prefix, uri in new_prefixes: + content_handler.startPrefixMapping(prefix, uri) + content_handler.startElementNS( + (ns_uri, local_name), qname, sax_attributes) + text = element.text + if text: + content_handler.characters(text) + for child in element: + self._recursive_saxify(child, element_nsmap) + content_handler.endElementNS((ns_uri, local_name), qname) + for prefix, uri in new_prefixes: + content_handler.endPrefixMapping(prefix) + tail = element.tail + if tail: + content_handler.characters(tail) + + def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute): + if ns_uri is None: + return local_name + + if not is_attribute and nsmap.get(preferred_prefix) == ns_uri: + prefix = preferred_prefix + else: + # Pick the first matching prefix, in alphabetical order. + candidates = [ + pfx for (pfx, uri) in nsmap.items() + if pfx is not None and uri == ns_uri + ] + prefix = ( + candidates[0] if len(candidates) == 1 + else min(candidates) if candidates + else None + ) + + if prefix is None: + # Default namespace + return local_name + return prefix + ':' + local_name + + +def saxify(element_or_tree, content_handler): + """One-shot helper to generate SAX events from an XML tree and fire + them against a SAX ContentHandler. + """ + return ElementTreeProducer(element_or_tree, content_handler).saxify() diff --git a/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi b/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi new file mode 100644 index 00000000..dc03df9a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi @@ -0,0 +1,875 @@ +# SAX-like interfaces + +class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError): + """ + An XMLSyntaxError that additionally inherits from AssertionError for + ElementTree / backwards compatibility reasons. + + This class may get replaced by a plain XMLSyntaxError in a future version. + """ + def __init__(self, message): + XMLSyntaxError.__init__(self, message, None, 0, 1) + + +ctypedef enum _SaxParserEvents: + SAX_EVENT_START = 1 << 0 + SAX_EVENT_END = 1 << 1 + SAX_EVENT_DATA = 1 << 2 + SAX_EVENT_DOCTYPE = 1 << 3 + SAX_EVENT_PI = 1 << 4 + SAX_EVENT_COMMENT = 1 << 5 + SAX_EVENT_START_NS = 1 << 6 + SAX_EVENT_END_NS = 1 << 7 + +ctypedef enum _ParseEventFilter: + PARSE_EVENT_FILTER_START = 1 << 0 + PARSE_EVENT_FILTER_END = 1 << 1 + PARSE_EVENT_FILTER_START_NS = 1 << 2 + PARSE_EVENT_FILTER_END_NS = 1 << 3 + PARSE_EVENT_FILTER_COMMENT = 1 << 4 + PARSE_EVENT_FILTER_PI = 1 << 5 + + +cdef int _buildParseEventFilter(events) except -1: + cdef int event_filter = 0 + for event in events: + if event == 'start': + event_filter |= PARSE_EVENT_FILTER_START + elif event == 'end': + event_filter |= PARSE_EVENT_FILTER_END + elif event == 'start-ns': + event_filter |= PARSE_EVENT_FILTER_START_NS + elif event == 'end-ns': + event_filter |= PARSE_EVENT_FILTER_END_NS + elif event == 'comment': + event_filter |= PARSE_EVENT_FILTER_COMMENT + elif event == 'pi': + event_filter |= PARSE_EVENT_FILTER_PI + else: + raise ValueError, f"invalid event name '{event}'" + return event_filter + + +cdef class _SaxParserTarget: + cdef int _sax_event_filter + + cdef _handleSaxStart(self, tag, attrib, nsmap): + return None + cdef _handleSaxEnd(self, tag): + return None + cdef int _handleSaxData(self, data) except -1: + return 0 + cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1: + return 0 + cdef _handleSaxPi(self, target, data): + return None + cdef _handleSaxComment(self, comment): + return None + cdef _handleSaxStartNs(self, prefix, uri): + return None + cdef _handleSaxEndNs(self, prefix): + return None + + +#@cython.final +@cython.internal +@cython.no_gc_clear # Required because parent class uses it - Cython bug. +cdef class _SaxParserContext(_ParserContext): + """This class maps SAX2 events to parser target events. + """ + cdef _SaxParserTarget _target + cdef _BaseParser _parser + cdef xmlparser.startElementNsSAX2Func _origSaxStart + cdef xmlparser.endElementNsSAX2Func _origSaxEnd + cdef xmlparser.startElementSAXFunc _origSaxStartNoNs + cdef xmlparser.endElementSAXFunc _origSaxEndNoNs + cdef xmlparser.charactersSAXFunc _origSaxData + cdef xmlparser.cdataBlockSAXFunc _origSaxCData + cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype + cdef xmlparser.commentSAXFunc _origSaxComment + cdef xmlparser.processingInstructionSAXFunc _origSaxPI + cdef xmlparser.startDocumentSAXFunc _origSaxStartDocument + + # for event collecting + cdef int _event_filter + cdef list _ns_stack + cdef list _node_stack + cdef _ParseEventsIterator events_iterator + + # for iterparse + cdef _Element _root + cdef _MultiTagMatcher _matcher + + def __cinit__(self, _BaseParser parser): + self._ns_stack = [] + self._node_stack = [] + self._parser = parser + self.events_iterator = _ParseEventsIterator() + + cdef void _setSaxParserTarget(self, _SaxParserTarget target) noexcept: + self._target = target + + cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept: + _ParserContext._initParserContext(self, c_ctxt) + if self._target is not None: + self._connectTarget(c_ctxt) + elif self._event_filter: + self._connectEvents(c_ctxt) + + cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept: + """Wrap original SAX2 callbacks to call into parser target. + """ + sax = c_ctxt.sax + self._origSaxStart = sax.startElementNs = NULL + self._origSaxStartNoNs = sax.startElement = NULL + if self._target._sax_event_filter & (SAX_EVENT_START | + SAX_EVENT_START_NS | + SAX_EVENT_END_NS): + # intercept => overwrite orig callback + # FIXME: also intercept on when collecting END events + if sax.initialized == xmlparser.XML_SAX2_MAGIC: + sax.startElementNs = _handleSaxTargetStart + if self._target._sax_event_filter & SAX_EVENT_START: + sax.startElement = _handleSaxTargetStartNoNs + + self._origSaxEnd = sax.endElementNs = NULL + self._origSaxEndNoNs = sax.endElement = NULL + if self._target._sax_event_filter & (SAX_EVENT_END | + SAX_EVENT_END_NS): + if sax.initialized == xmlparser.XML_SAX2_MAGIC: + sax.endElementNs = _handleSaxEnd + if self._target._sax_event_filter & SAX_EVENT_END: + sax.endElement = _handleSaxEndNoNs + + self._origSaxData = sax.characters = sax.cdataBlock = NULL + if self._target._sax_event_filter & SAX_EVENT_DATA: + sax.characters = sax.cdataBlock = _handleSaxData + + # doctype propagation is always required for entity replacement + self._origSaxDoctype = sax.internalSubset + if self._target._sax_event_filter & SAX_EVENT_DOCTYPE: + sax.internalSubset = _handleSaxTargetDoctype + + self._origSaxPI = sax.processingInstruction = NULL + if self._target._sax_event_filter & SAX_EVENT_PI: + sax.processingInstruction = _handleSaxTargetPI + + self._origSaxComment = sax.comment = NULL + if self._target._sax_event_filter & SAX_EVENT_COMMENT: + sax.comment = _handleSaxTargetComment + + # enforce entity replacement + sax.reference = NULL + c_ctxt.replaceEntities = 1 + + cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept: + """Wrap original SAX2 callbacks to collect parse events without parser target. + """ + sax = c_ctxt.sax + self._origSaxStartDocument = sax.startDocument + sax.startDocument = _handleSaxStartDocument + + # only override "start" event handler if needed + self._origSaxStart = sax.startElementNs + if self._event_filter == 0 or c_ctxt.html or \ + self._event_filter & (PARSE_EVENT_FILTER_START | + PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_START_NS | + PARSE_EVENT_FILTER_END_NS): + sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart + + self._origSaxStartNoNs = sax.startElement + if self._event_filter == 0 or c_ctxt.html or \ + self._event_filter & (PARSE_EVENT_FILTER_START | + PARSE_EVENT_FILTER_END): + sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs + + # only override "end" event handler if needed + self._origSaxEnd = sax.endElementNs + if self._event_filter == 0 or \ + self._event_filter & (PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_END_NS): + sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd + + self._origSaxEndNoNs = sax.endElement + if self._event_filter == 0 or \ + self._event_filter & PARSE_EVENT_FILTER_END: + sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs + + self._origSaxComment = sax.comment + if self._event_filter & PARSE_EVENT_FILTER_COMMENT: + sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment + + self._origSaxPI = sax.processingInstruction + if self._event_filter & PARSE_EVENT_FILTER_PI: + sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent + + cdef _setEventFilter(self, events, tag): + self._event_filter = _buildParseEventFilter(events) + if not self._event_filter or tag is None or tag == '*': + self._matcher = None + else: + self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag) + + cdef int startDocument(self, xmlDoc* c_doc) except -1: + try: + self._doc = _documentFactory(c_doc, self._parser) + finally: + self._parser = None # clear circular reference ASAP + if self._matcher is not None: + self._matcher.cacheTags(self._doc, True) # force entry in libxml2 dict + return 0 + + cdef int pushEvent(self, event, xmlNode* c_node) except -1: + cdef _Element root + if self._root is None: + root = self._doc.getroot() + if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE: + self._root = root + node = _elementFactory(self._doc, c_node) + self.events_iterator._events.append( (event, node) ) + return 0 + + cdef int flushEvents(self) except -1: + events = self.events_iterator._events + while self._node_stack: + events.append( ('end', self._node_stack.pop()) ) + _pushSaxNsEndEvents(self) + while self._ns_stack: + _pushSaxNsEndEvents(self) + + cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept: + if c_ctxt.errNo == xmlerror.XML_ERR_OK: + c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR + # stop parsing immediately + c_ctxt.wellFormed = 0 + c_ctxt.disableSAX = 1 + c_ctxt.instate = xmlparser.XML_PARSER_EOF + self._store_raised() + + +@cython.final +@cython.internal +cdef class _ParseEventsIterator: + """A reusable parse events iterator""" + cdef list _events + cdef int _event_index + + def __cinit__(self): + self._events = [] + self._event_index = 0 + + def __iter__(self): + return self + + def __next__(self): + cdef int event_index = self._event_index + events = self._events + if event_index >= 2**10 or event_index * 2 >= len(events): + if event_index: + # clean up from time to time + del events[:event_index] + self._event_index = event_index = 0 + if event_index >= len(events): + raise StopIteration + item = events[event_index] + self._event_index = event_index + 1 + return item + + +cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces, + const_xmlChar** c_namespaces): + "Build [(prefix, uri)] list of declared namespaces." + cdef int i + namespaces = [] + for i in xrange(c_nb_namespaces): + namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1]))) + c_namespaces += 2 + return namespaces + + +cdef void _handleSaxStart( + void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix, + const_xmlChar* c_namespace, int c_nb_namespaces, + const_xmlChar** c_namespaces, + int c_nb_attributes, int c_nb_defaulted, + const_xmlChar** c_attributes) noexcept with gil: + cdef int i + cdef size_t c_len + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + cdef int event_filter = context._event_filter + try: + if (c_nb_namespaces and + event_filter & (PARSE_EVENT_FILTER_START_NS | + PARSE_EVENT_FILTER_END_NS)): + declared_namespaces = _build_prefix_uri_list( + context, c_nb_namespaces, c_namespaces) + if event_filter & PARSE_EVENT_FILTER_START_NS: + for prefix_uri_tuple in declared_namespaces: + context.events_iterator._events.append(("start-ns", prefix_uri_tuple)) + else: + declared_namespaces = None + + context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace, + c_nb_namespaces, c_namespaces, c_nb_attributes, + c_nb_defaulted, c_attributes) + if c_ctxt.html: + _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node) + # The HTML parser in libxml2 reports the missing opening tags when it finds + # misplaced ones, but with tag names from C string constants that ignore the + # parser dict. Thus, we need to intern the name ourselves. + c_localname = tree.xmlDictLookup(c_ctxt.dict, c_localname, -1) + if c_localname is NULL: + raise MemoryError() + + if event_filter & PARSE_EVENT_FILTER_END_NS: + context._ns_stack.append(declared_namespaces) + if event_filter & (PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_START): + _pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxTargetStart( + void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix, + const_xmlChar* c_namespace, int c_nb_namespaces, + const_xmlChar** c_namespaces, + int c_nb_attributes, int c_nb_defaulted, + const_xmlChar** c_attributes) noexcept with gil: + cdef int i + cdef size_t c_len + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + + cdef int event_filter = context._event_filter + cdef int sax_event_filter = context._target._sax_event_filter + try: + if c_nb_namespaces: + declared_namespaces = _build_prefix_uri_list( + context, c_nb_namespaces, c_namespaces) + + if event_filter & PARSE_EVENT_FILTER_START_NS: + for prefix_uri_tuple in declared_namespaces: + context.events_iterator._events.append(("start-ns", prefix_uri_tuple)) + + if sax_event_filter & SAX_EVENT_START_NS: + for prefix, uri in declared_namespaces: + context._target._handleSaxStartNs(prefix, uri) + else: + declared_namespaces = None + + if sax_event_filter & SAX_EVENT_START: + if c_nb_defaulted > 0: + # only add default attributes if we asked for them + if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0: + c_nb_attributes -= c_nb_defaulted + if c_nb_attributes == 0: + attrib = IMMUTABLE_EMPTY_MAPPING + else: + attrib = {} + for i in xrange(c_nb_attributes): + name = _namespacedNameFromNsName( + c_attributes[2], c_attributes[0]) + if c_attributes[3] is NULL: + value = '' + else: + c_len = c_attributes[4] - c_attributes[3] + value = c_attributes[3][:c_len].decode('utf8') + attrib[name] = value + c_attributes += 5 + + nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING + + element = _callTargetSaxStart( + context, c_ctxt, + _namespacedNameFromNsName(c_namespace, c_localname), + attrib, nsmap) + else: + element = None + + if (event_filter & PARSE_EVENT_FILTER_END_NS or + sax_event_filter & SAX_EVENT_END_NS): + context._ns_stack.append(declared_namespaces) + if event_filter & (PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_START): + _pushSaxStartEvent(context, c_ctxt, c_namespace, + c_localname, element) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name, + const_xmlChar** c_attributes) noexcept with gil: + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + context._origSaxStartNoNs(c_ctxt, c_name, c_attributes) + if c_ctxt.html: + _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node) + # The HTML parser in libxml2 reports the missing opening tags when it finds + # misplaced ones, but with tag names from C string constants that ignore the + # parser dict. Thus, we need to intern the name ourselves. + c_name = tree.xmlDictLookup(c_ctxt.dict, c_name, -1) + if c_name is NULL: + raise MemoryError() + if context._event_filter & (PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_START): + _pushSaxStartEvent(context, c_ctxt, NULL, c_name, None) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name, + const_xmlChar** c_attributes) noexcept with gil: + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + if c_attributes is NULL: + attrib = IMMUTABLE_EMPTY_MAPPING + else: + attrib = {} + while c_attributes[0] is not NULL: + name = funicode(c_attributes[0]) + attrib[name] = funicodeOrEmpty(c_attributes[1]) + c_attributes += 2 + element = _callTargetSaxStart( + context, c_ctxt, funicode(c_name), + attrib, IMMUTABLE_EMPTY_MAPPING) + if context._event_filter & (PARSE_EVENT_FILTER_END | + PARSE_EVENT_FILTER_START): + _pushSaxStartEvent(context, c_ctxt, NULL, c_name, element) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef _callTargetSaxStart(_SaxParserContext context, + xmlparser.xmlParserCtxt* c_ctxt, + tag, attrib, nsmap): + element = context._target._handleSaxStart(tag, attrib, nsmap) + if element is not None and c_ctxt.input is not NULL: + if isinstance(element, _Element): + (<_Element>element)._c_node.line = ( + <unsigned short>c_ctxt.input.line + if c_ctxt.input.line < 65535 else 65535) + return element + + +cdef int _pushSaxStartEvent(_SaxParserContext context, + xmlparser.xmlParserCtxt* c_ctxt, + const_xmlChar* c_href, + const_xmlChar* c_name, node) except -1: + if (context._matcher is None or + context._matcher.matchesNsTag(c_href, c_name)): + if node is None and context._target is None: + assert context._doc is not None + node = _elementFactory(context._doc, c_ctxt.node) + if context._event_filter & PARSE_EVENT_FILTER_START: + context.events_iterator._events.append(('start', node)) + if (context._target is None and + context._event_filter & PARSE_EVENT_FILTER_END): + context._node_stack.append(node) + return 0 + + +cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname, + const_xmlChar* c_prefix, + const_xmlChar* c_namespace) noexcept with gil: + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + if context._target is not None: + if context._target._sax_event_filter & SAX_EVENT_END: + node = context._target._handleSaxEnd( + _namespacedNameFromNsName(c_namespace, c_localname)) + else: + node = None + else: + context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace) + node = None + _pushSaxEndEvent(context, c_namespace, c_localname, node) + _pushSaxNsEndEvents(context) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil: + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + if context._target is not None: + node = context._target._handleSaxEnd(funicode(c_name)) + else: + context._origSaxEndNoNs(c_ctxt, c_name) + node = None + _pushSaxEndEvent(context, NULL, c_name, node) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1: + cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS + cdef bint call_target = ( + context._target is not None + and context._target._sax_event_filter & SAX_EVENT_END_NS) + if not build_events and not call_target: + return 0 + + cdef list declared_namespaces = context._ns_stack.pop() + if declared_namespaces is None: + return 0 + + cdef tuple prefix_uri + for prefix_uri in reversed(declared_namespaces): + if call_target: + context._target._handleSaxEndNs(prefix_uri[0]) + if build_events: + context.events_iterator._events.append(('end-ns', None)) + + return 0 + + +cdef int _pushSaxEndEvent(_SaxParserContext context, + const_xmlChar* c_href, + const_xmlChar* c_name, node) except -1: + if context._event_filter & PARSE_EVENT_FILTER_END: + if (context._matcher is None or + context._matcher.matchesNsTag(c_href, c_name)): + if context._target is None: + node = context._node_stack.pop() + context.events_iterator._events.append(('end', node)) + return 0 + + +cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil: + # can only be called if parsing with a target + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + context._target._handleSaxData( + c_data[:data_len].decode('utf8')) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name, + const_xmlChar* c_public, + const_xmlChar* c_system) noexcept with gil: + # can only be called if parsing with a target + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + context._target._handleSaxDoctype( + funicodeOrNone(c_name), + funicodeOrNone(c_public), + funicodeOrNone(c_system)) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil: + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + context._origSaxStartDocument(ctxt) + c_doc = c_ctxt.myDoc + try: + context.startDocument(c_doc) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target, + const_xmlChar* c_data) noexcept with gil: + # can only be called if parsing with a target + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + pi = context._target._handleSaxPi( + funicodeOrNone(c_target), + funicodeOrEmpty(c_data)) + if context._event_filter & PARSE_EVENT_FILTER_PI: + context.events_iterator._events.append(('pi', pi)) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target, + const_xmlChar* data) noexcept with gil: + # can only be called when collecting pi events + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + context._origSaxPI(ctxt, target, data) + c_node = _findLastEventNode(c_ctxt) + if c_node is NULL: + return + try: + context.pushEvent('pi', c_node) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil: + # can only be called if parsing with a target + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + try: + comment = context._target._handleSaxComment(funicodeOrEmpty(c_data)) + if context._event_filter & PARSE_EVENT_FILTER_COMMENT: + context.events_iterator._events.append(('comment', comment)) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil: + # can only be called when collecting comment events + c_ctxt = <xmlparser.xmlParserCtxt*>ctxt + if c_ctxt._private is NULL or c_ctxt.disableSAX: + return + context = <_SaxParserContext>c_ctxt._private + context._origSaxComment(ctxt, text) + c_node = _findLastEventNode(c_ctxt) + if c_node is NULL: + return + try: + context.pushEvent('comment', c_node) + except: + context._handleSaxException(c_ctxt) + finally: + return # swallow any further exceptions + + +cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt): + # this mimics what libxml2 creates for comments/PIs + if c_ctxt.inSubset == 1: + return c_ctxt.myDoc.intSubset.last + elif c_ctxt.inSubset == 2: + return c_ctxt.myDoc.extSubset.last + elif c_ctxt.node is NULL: + return c_ctxt.myDoc.last + elif c_ctxt.node.type == tree.XML_ELEMENT_NODE: + return c_ctxt.node.last + else: + return c_ctxt.node.next + + +############################################################ +## ET compatible XML tree builder +############################################################ + +cdef class TreeBuilder(_SaxParserTarget): + """TreeBuilder(self, element_factory=None, parser=None, + comment_factory=None, pi_factory=None, + insert_comments=True, insert_pis=True) + + Parser target that builds a tree from parse event callbacks. + + The factory arguments can be used to influence the creation of + elements, comments and processing instructions. + + By default, comments and processing instructions are inserted into + the tree, but they can be ignored by passing the respective flags. + + The final tree is returned by the ``close()`` method. + """ + cdef _BaseParser _parser + cdef object _factory + cdef object _comment_factory + cdef object _pi_factory + cdef list _data + cdef list _element_stack + cdef object _element_stack_pop + cdef _Element _last # may be None + cdef bint _in_tail + cdef bint _insert_comments + cdef bint _insert_pis + + def __init__(self, *, element_factory=None, parser=None, + comment_factory=None, pi_factory=None, + bint insert_comments=True, bint insert_pis=True): + self._sax_event_filter = \ + SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \ + SAX_EVENT_PI | SAX_EVENT_COMMENT + self._data = [] # data collector + self._element_stack = [] # element stack + self._element_stack_pop = self._element_stack.pop + self._last = None # last element + self._in_tail = 0 # true if we're after an end tag + self._factory = element_factory + self._comment_factory = comment_factory if comment_factory is not None else Comment + self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction + self._insert_comments = insert_comments + self._insert_pis = insert_pis + self._parser = parser + + @cython.final + cdef int _flush(self) except -1: + if self._data: + if self._last is not None: + text = "".join(self._data) + if self._in_tail: + assert self._last.tail is None, "internal error (tail)" + self._last.tail = text + else: + assert self._last.text is None, "internal error (text)" + self._last.text = text + del self._data[:] + return 0 + + # internal SAX event handlers + + @cython.final + cdef _handleSaxStart(self, tag, attrib, nsmap): + self._flush() + if self._factory is not None: + self._last = self._factory(tag, attrib) + if self._element_stack: + _appendChild(self._element_stack[-1], self._last) + elif self._element_stack: + self._last = _makeSubElement( + self._element_stack[-1], tag, None, None, attrib, nsmap, None) + else: + self._last = _makeElement( + tag, NULL, None, self._parser, None, None, attrib, nsmap, None) + self._element_stack.append(self._last) + self._in_tail = 0 + return self._last + + @cython.final + cdef _handleSaxEnd(self, tag): + self._flush() + self._last = self._element_stack_pop() + self._in_tail = 1 + return self._last + + @cython.final + cdef int _handleSaxData(self, data) except -1: + self._data.append(data) + + @cython.final + cdef _handleSaxPi(self, target, data): + elem = self._pi_factory(target, data) + if self._insert_pis: + self._flush() + self._last = elem + if self._element_stack: + _appendChild(self._element_stack[-1], self._last) + self._in_tail = 1 + return self._last + + @cython.final + cdef _handleSaxComment(self, comment): + elem = self._comment_factory(comment) + if self._insert_comments: + self._flush() + self._last = elem + if self._element_stack: + _appendChild(self._element_stack[-1], self._last) + self._in_tail = 1 + return elem + + # Python level event handlers + + def close(self): + """close(self) + + Flushes the builder buffers, and returns the toplevel document + element. Raises XMLSyntaxError on inconsistencies. + """ + if self._element_stack: + raise XMLSyntaxAssertionError("missing end tags") + # TODO: this does not necessarily seem like an error case. Why not just return None? + if self._last is None: + raise XMLSyntaxAssertionError("missing toplevel element") + return self._last + + def data(self, data): + """data(self, data) + + Adds text to the current element. The value should be either an + 8-bit string containing ASCII text, or a Unicode string. + """ + self._handleSaxData(data) + + def start(self, tag, attrs, nsmap=None): + """start(self, tag, attrs, nsmap=None) + + Opens a new element. + """ + if nsmap is None: + nsmap = IMMUTABLE_EMPTY_MAPPING + return self._handleSaxStart(tag, attrs, nsmap) + + def end(self, tag): + """end(self, tag) + + Closes the current element. + """ + element = self._handleSaxEnd(tag) + assert self._last.tag == tag,\ + f"end tag mismatch (expected {self._last.tag}, got {tag})" + return element + + def pi(self, target, data=None): + """pi(self, target, data=None) + + Creates a processing instruction using the factory, appends it + (unless disabled) and returns it. + """ + return self._handleSaxPi(target, data) + + def comment(self, comment): + """comment(self, comment) + + Creates a comment using the factory, appends it (unless disabled) + and returns it. + """ + return self._handleSaxComment(comment) diff --git a/.venv/lib/python3.12/site-packages/lxml/schematron.pxi b/.venv/lib/python3.12/site-packages/lxml/schematron.pxi new file mode 100644 index 00000000..ea0881fd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/schematron.pxi @@ -0,0 +1,168 @@ +# support for Schematron validation +from lxml.includes cimport schematron + + +cdef class SchematronError(LxmlError): + """Base class of all Schematron errors. + """ + +cdef class SchematronParseError(SchematronError): + """Error while parsing an XML document as Schematron schema. + """ + +cdef class SchematronValidateError(SchematronError): + """Error while validating an XML document with a Schematron schema. + """ + + +################################################################################ +# Schematron + +cdef class Schematron(_Validator): + """Schematron(self, etree=None, file=None) + A Schematron validator. + + Pass a root Element or an ElementTree to turn it into a validator. + Alternatively, pass a filename as keyword argument 'file' to parse from + the file system. + + Schematron is a less well known, but very powerful schema language. The main + idea is to use the capabilities of XPath to put restrictions on the structure + and the content of XML documents. Here is a simple example:: + + >>> schematron = Schematron(XML(''' + ... <schema xmlns="http://www.ascc.net/xml/schematron" > + ... <pattern name="id is the only permitted attribute name"> + ... <rule context="*"> + ... <report test="@*[not(name()='id')]">Attribute + ... <name path="@*[not(name()='id')]"/> is forbidden<name/> + ... </report> + ... </rule> + ... </pattern> + ... </schema> + ... ''')) + + >>> xml = XML(''' + ... <AAA name="aaa"> + ... <BBB id="bbb"/> + ... <CCC color="ccc"/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + 0 + + >>> xml = XML(''' + ... <AAA id="aaa"> + ... <BBB id="bbb"/> + ... <CCC/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + 1 + + Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32, + however, Schematron lacked support for error reporting other than to stderr. + This version is therefore required to retrieve validation warnings and + errors in lxml. + """ + cdef schematron.xmlSchematron* _c_schema + cdef xmlDoc* _c_schema_doc + def __cinit__(self): + self._c_schema = NULL + self._c_schema_doc = NULL + + def __init__(self, etree=None, *, file=None): + cdef _Document doc + cdef _Element root_node + cdef xmlNode* c_node + cdef char* c_href + cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL + _Validator.__init__(self) + if not config.ENABLE_SCHEMATRON: + raise SchematronError, \ + "lxml.etree was compiled without Schematron support." + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node) + parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc) + elif file is not None: + filename = _getFilenameForFile(file) + if filename is None: + # XXX assume a string object + filename = file + filename = _encodeFilename(filename) + with self._error_log: + orig_loader = _register_document_loader() + parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename)) + _reset_document_loader(orig_loader) + else: + raise SchematronParseError, "No tree or file given" + + if parser_ctxt is NULL: + if self._c_schema_doc is not NULL: + tree.xmlFreeDoc(self._c_schema_doc) + self._c_schema_doc = NULL + raise MemoryError() + + try: + with self._error_log: + orig_loader = _register_document_loader() + self._c_schema = schematron.xmlSchematronParse(parser_ctxt) + _reset_document_loader(orig_loader) + finally: + schematron.xmlSchematronFreeParserCtxt(parser_ctxt) + + if self._c_schema is NULL: + raise SchematronParseError( + "Document is not a valid Schematron schema", + self._error_log) + + def __dealloc__(self): + schematron.xmlSchematronFree(self._c_schema) + if self._c_schema_doc is not NULL: + tree.xmlFreeDoc(self._c_schema_doc) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using Schematron. + + Returns true if document is valid, false if not.""" + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef schematron.xmlSchematronValidCtxt* valid_ctxt + cdef int ret + + assert self._c_schema is not NULL, "Schematron instance not initialised" + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + valid_ctxt = schematron.xmlSchematronNewValidCtxt( + self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR) + if valid_ctxt is NULL: + raise MemoryError() + + try: + self._error_log.clear() + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + schematron.xmlSchematronSetValidStructuredErrors( + valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + with nogil: + ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) + finally: + schematron.xmlSchematronFreeValidCtxt(valid_ctxt) + + if ret == -1: + raise SchematronValidateError( + "Internal error in Schematron validation", + self._error_log) + if ret == 0: + return True + else: + return False diff --git a/.venv/lib/python3.12/site-packages/lxml/serializer.pxi b/.venv/lib/python3.12/site-packages/lxml/serializer.pxi new file mode 100644 index 00000000..f0de0f9f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/serializer.pxi @@ -0,0 +1,1781 @@ +# XML serialization and output functions + +cdef object GzipFile +from gzip import GzipFile + + +cdef class SerialisationError(LxmlError): + """A libxml2 error that occurred during serialisation. + """ + + +cdef enum _OutputMethods: + OUTPUT_METHOD_XML + OUTPUT_METHOD_HTML + OUTPUT_METHOD_TEXT + + +cdef int _findOutputMethod(method) except -1: + if method is None: + return OUTPUT_METHOD_XML + method = method.lower() + if method == "xml": + return OUTPUT_METHOD_XML + if method == "html": + return OUTPUT_METHOD_HTML + if method == "text": + return OUTPUT_METHOD_TEXT + raise ValueError(f"unknown output method {method!r}") + + +cdef _textToString(xmlNode* c_node, encoding, bint with_tail): + cdef bint needs_conversion + cdef const_xmlChar* c_text + cdef xmlNode* c_text_node + cdef tree.xmlBuffer* c_buffer + cdef int error_result + + c_buffer = tree.xmlBufferCreate() + if c_buffer is NULL: + raise MemoryError() + + with nogil: + error_result = tree.xmlNodeBufGetContent(c_buffer, c_node) + if with_tail: + c_text_node = _textNodeOrSkip(c_node.next) + while c_text_node is not NULL: + tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content) + c_text_node = _textNodeOrSkip(c_text_node.next) + c_text = tree.xmlBufferContent(c_buffer) + + if error_result < 0 or c_text is NULL: + tree.xmlBufferFree(c_buffer) + raise SerialisationError, "Error during serialisation (out of memory?)" + + try: + needs_conversion = 0 + if encoding is unicode: + needs_conversion = 1 + elif encoding is not None: + # Python prefers lower case encoding names + encoding = encoding.lower() + if encoding not in ('utf8', 'utf-8'): + if encoding == 'ascii': + if isutf8l(c_text, tree.xmlBufferLength(c_buffer)): + # will raise a decode error below + needs_conversion = 1 + else: + needs_conversion = 1 + + if needs_conversion: + text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8') + if encoding is not unicode: + encoding = _utf8(encoding) + text = python.PyUnicode_AsEncodedString( + text, encoding, 'strict') + else: + text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)] + finally: + tree.xmlBufferFree(c_buffer) + return text + + +cdef _tostring(_Element element, encoding, doctype, method, + bint write_xml_declaration, bint write_complete_document, + bint pretty_print, bint with_tail, int standalone): + """Serialize an element to an encoded string representation of its XML + tree. + """ + cdef tree.xmlOutputBuffer* c_buffer + cdef tree.xmlBuf* c_result_buffer + cdef tree.xmlCharEncodingHandler* enchandler + cdef const_char* c_enc + cdef const_xmlChar* c_version + cdef const_xmlChar* c_doctype + cdef int c_method + cdef int error_result + if element is None: + return None + _assertValidNode(element) + c_method = _findOutputMethod(method) + if c_method == OUTPUT_METHOD_TEXT: + return _textToString(element._c_node, encoding, with_tail) + if encoding is None or encoding is unicode: + c_enc = NULL + else: + encoding = _utf8(encoding) + c_enc = _cstr(encoding) + if doctype is None: + c_doctype = NULL + else: + doctype = _utf8(doctype) + c_doctype = _xcstr(doctype) + # it is necessary to *and* find the encoding handler *and* use + # encoding during output + enchandler = tree.xmlFindCharEncodingHandler(c_enc) + if enchandler is NULL and c_enc is not NULL: + if encoding is not None: + encoding = encoding.decode('UTF-8') + raise LookupError, f"unknown encoding: '{encoding}'" + c_buffer = tree.xmlAllocOutputBuffer(enchandler) + if c_buffer is NULL: + tree.xmlCharEncCloseFunc(enchandler) + raise MemoryError() + + with nogil: + _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method, + write_xml_declaration, write_complete_document, + pretty_print, with_tail, standalone) + tree.xmlOutputBufferFlush(c_buffer) + if c_buffer.conv is not NULL: + c_result_buffer = c_buffer.conv + else: + c_result_buffer = c_buffer.buffer + + error_result = c_buffer.error + if error_result != xmlerror.XML_ERR_OK: + tree.xmlOutputBufferClose(c_buffer) + _raiseSerialisationError(error_result) + + try: + if encoding is unicode: + result = (<unsigned char*>tree.xmlBufContent( + c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8') + else: + result = <bytes>(<unsigned char*>tree.xmlBufContent( + c_result_buffer))[:tree.xmlBufUse(c_result_buffer)] + finally: + error_result = tree.xmlOutputBufferClose(c_buffer) + if error_result == -1: + _raiseSerialisationError(error_result) + return result + +cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes): + cdef xmlDoc* c_doc + cdef xmlChar* c_buffer = NULL + cdef int byte_count = -1 + cdef bytes result + cdef _Document doc + cdef _Element element + cdef xmlChar **c_inclusive_ns_prefixes + + if isinstance(element_or_tree, _Element): + _assertValidNode(<_Element>element_or_tree) + doc = (<_Element>element_or_tree)._doc + c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0) + else: + doc = _documentOrRaise(element_or_tree) + _assertValidDoc(doc) + c_doc = doc._c_doc + + c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL + try: + with nogil: + byte_count = c14n.xmlC14NDocDumpMemory( + c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer) + + finally: + _destroyFakeDoc(doc._c_doc, c_doc) + if c_inclusive_ns_prefixes is not NULL: + python.lxml_free(c_inclusive_ns_prefixes) + + if byte_count < 0 or c_buffer is NULL: + if c_buffer is not NULL: + tree.xmlFree(c_buffer) + raise C14NError, "C14N failed" + try: + result = c_buffer[:byte_count] + finally: + tree.xmlFree(c_buffer) + return result + +cdef _raiseSerialisationError(int error_result): + if error_result == xmlerror.XML_ERR_NO_MEMORY: + raise MemoryError() + message = ErrorTypes._getName(error_result) + if message is None: + message = f"unknown error {error_result}" + raise SerialisationError, message + +############################################################ +# low-level serialisation functions + +cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer, + const_xmlChar* c_doctype) noexcept nogil: + tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype), + <const_char*>c_doctype) + tree.xmlOutputBufferWriteString(c_buffer, "\n") + +cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer, + xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype, + int c_method, bint write_xml_declaration, + bint write_complete_document, + bint pretty_print, bint with_tail, + int standalone) noexcept nogil: + cdef xmlNode* c_nsdecl_node + cdef xmlDoc* c_doc = c_node.doc + if write_xml_declaration and c_method == OUTPUT_METHOD_XML: + _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone) + + # comments/processing instructions before doctype declaration + if write_complete_document and not c_buffer.error and c_doc.intSubset: + _writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print) + + if c_doctype: + _writeDoctype(c_buffer, c_doctype) + # write internal DTD subset, preceding PIs/comments, etc. + if write_complete_document and not c_buffer.error: + if c_doctype is NULL: + _writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding) + _writePrevSiblings(c_buffer, c_node, encoding, pretty_print) + + c_nsdecl_node = c_node + if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE: + # copy the node and add namespaces from parents + # this is required to make libxml write them + c_nsdecl_node = tree.xmlCopyNode(c_node, 2) + if not c_nsdecl_node: + c_buffer.error = xmlerror.XML_ERR_NO_MEMORY + return + _copyParentNamespaces(c_node, c_nsdecl_node) + + c_nsdecl_node.parent = c_node.parent + c_nsdecl_node.children = c_node.children + c_nsdecl_node.last = c_node.last + + # write node + if c_method == OUTPUT_METHOD_HTML: + tree.htmlNodeDumpFormatOutput( + c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print) + else: + tree.xmlNodeDumpOutput( + c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding) + + if c_nsdecl_node is not c_node: + # clean up + c_nsdecl_node.children = c_nsdecl_node.last = NULL + tree.xmlFreeNode(c_nsdecl_node) + + if c_buffer.error: + return + + # write tail, trailing comments, etc. + if with_tail: + _writeTail(c_buffer, c_node, encoding, c_method, pretty_print) + if write_complete_document: + _writeNextSiblings(c_buffer, c_node, encoding, pretty_print) + if pretty_print: + tree.xmlOutputBufferWrite(c_buffer, 1, "\n") + +cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer, + const_xmlChar* version, const_char* encoding, + int standalone) noexcept nogil: + if version is NULL: + version = <unsigned char*>"1.0" + tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='") + tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version) + tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='") + tree.xmlOutputBufferWriteString(c_buffer, encoding) + if standalone == 0: + tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n") + elif standalone == 1: + tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n") + else: + tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n") + +cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer, + xmlDoc* c_doc, const_xmlChar* c_root_name, + int c_method, const_char* encoding) noexcept nogil: + cdef tree.xmlDtd* c_dtd + cdef xmlNode* c_node + cdef char* quotechar + c_dtd = c_doc.intSubset + if not c_dtd or not c_dtd.name: + return + + # Name in document type declaration must match the root element tag. + # For XML, case sensitive match, for HTML insensitive. + if c_method == OUTPUT_METHOD_HTML: + if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0: + return + else: + if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0: + return + + tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ") + tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name) + + cdef const_xmlChar* public_id = c_dtd.ExternalID + cdef const_xmlChar* sys_url = c_dtd.SystemID + if public_id and public_id[0] == b'\0': + public_id = NULL + if sys_url and sys_url[0] == b'\0': + sys_url = NULL + + if public_id: + tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "') + tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id) + if sys_url: + tree.xmlOutputBufferWrite(c_buffer, 2, '" ') + else: + tree.xmlOutputBufferWrite(c_buffer, 1, '"') + elif sys_url: + tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ') + + if sys_url: + if tree.xmlStrchr(sys_url, b'"'): + quotechar = '\'' + else: + quotechar = '"' + tree.xmlOutputBufferWrite(c_buffer, 1, quotechar) + tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url) + tree.xmlOutputBufferWrite(c_buffer, 1, quotechar) + + if (not c_dtd.entities and not c_dtd.elements and + not c_dtd.attributes and not c_dtd.notations and + not c_dtd.pentities): + tree.xmlOutputBufferWrite(c_buffer, 2, '>\n') + return + + tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n') + if c_dtd.notations and not c_buffer.error: + c_buf = tree.xmlBufferCreate() + if not c_buf: + c_buffer.error = xmlerror.XML_ERR_NO_MEMORY + return + tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations) + tree.xmlOutputBufferWrite( + c_buffer, tree.xmlBufferLength(c_buf), + <const_char*>tree.xmlBufferContent(c_buf)) + tree.xmlBufferFree(c_buf) + c_node = c_dtd.children + while c_node and not c_buffer.error: + tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding) + c_node = c_node.next + tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n") + +cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, + const_char* encoding, int c_method, bint pretty_print) noexcept nogil: + "Write the element tail." + c_node = c_node.next + while c_node and not c_buffer.error and c_node.type in ( + tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE): + if c_method == OUTPUT_METHOD_HTML: + tree.htmlNodeDumpFormatOutput( + c_buffer, c_node.doc, c_node, encoding, pretty_print) + else: + tree.xmlNodeDumpOutput( + c_buffer, c_node.doc, c_node, 0, pretty_print, encoding) + c_node = c_node.next + +cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, + const_char* encoding, bint pretty_print) noexcept nogil: + cdef xmlNode* c_sibling + if c_node.parent and _isElement(c_node.parent): + return + # we are at a root node, so add PI and comment siblings + c_sibling = c_node + while c_sibling.prev and \ + (c_sibling.prev.type == tree.XML_PI_NODE or + c_sibling.prev.type == tree.XML_COMMENT_NODE): + c_sibling = c_sibling.prev + while c_sibling is not c_node and not c_buffer.error: + tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, + pretty_print, encoding) + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") + c_sibling = c_sibling.next + +cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node, + const_char* encoding, bint pretty_print) noexcept nogil: + cdef xmlNode* c_sibling + if c_node.parent and _isElement(c_node.parent): + return + # we are at a root node, so add PI and comment siblings + c_sibling = c_node.next + while not c_buffer.error and c_sibling and \ + (c_sibling.type == tree.XML_PI_NODE or + c_sibling.type == tree.XML_COMMENT_NODE): + if pretty_print: + tree.xmlOutputBufferWriteString(c_buffer, "\n") + tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0, + pretty_print, encoding) + c_sibling = c_sibling.next + + +# copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent()) +cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string): + cdef const char *base + cdef const char *cur + + if string == NULL: + return + + base = cur = <const char*>string + while cur[0] != 0: + if cur[0] == b'\n': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 5, " ") + cur += 1 + base = cur + + elif cur[0] == b'\r': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 5, " ") + cur += 1 + base = cur + + elif cur[0] == b'\t': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 4, "	") + cur += 1 + base = cur + + elif cur[0] == b'"': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 6, """) + cur += 1 + base = cur + + elif cur[0] == b'<': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 4, "<") + cur += 1 + base = cur + + elif cur[0] == b'>': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 4, ">") + cur += 1 + base = cur + elif cur[0] == b'&': + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + tree.xmlOutputBufferWrite(buf, 5, "&") + cur += 1 + base = cur + + else: + # Leave further encoding and escaping to the buffer encoder. + cur += 1 + + if base != cur: + tree.xmlOutputBufferWrite(buf, cur - base, base) + + +############################################################ +# output to file-like objects + +cdef object io_open +from io import open as io_open + +cdef object gzip +import gzip + +cdef object getwriter +from codecs import getwriter +cdef object utf8_writer = getwriter('utf8') + +cdef object contextmanager +from contextlib import contextmanager + +cdef object _open_utf8_file + +@contextmanager +def _open_utf8_file(file, compression=0): + file = _getFSPathOrObject(file) + if _isString(file): + if compression: + with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf: + yield utf8_writer(zf) + else: + with io_open(file, 'w', encoding='utf8') as f: + yield f + else: + if compression: + with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf: + yield utf8_writer(zf) + else: + yield utf8_writer(file) + + +@cython.final +@cython.internal +cdef class _FilelikeWriter: + cdef object _filelike + cdef object _close_filelike + cdef _ExceptionContext _exc_context + cdef _ErrorLog error_log + def __cinit__(self, filelike, exc_context=None, compression=None, close=False): + if compression is not None and compression > 0: + filelike = GzipFile( + fileobj=filelike, mode='wb', compresslevel=compression) + self._close_filelike = filelike.close + elif close: + self._close_filelike = filelike.close + self._filelike = filelike + if exc_context is None: + self._exc_context = _ExceptionContext() + else: + self._exc_context = exc_context + self.error_log = _ErrorLog() + + cdef tree.xmlOutputBuffer* _createOutputBuffer( + self, tree.xmlCharEncodingHandler* enchandler) except NULL: + cdef tree.xmlOutputBuffer* c_buffer + c_buffer = tree.xmlOutputBufferCreateIO( + <tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter, + <python.PyObject*>self, enchandler) + if c_buffer is NULL: + raise IOError, "Could not create I/O writer context." + return c_buffer + + cdef int write(self, char* c_buffer, int size) noexcept: + try: + if self._filelike is None: + raise IOError, "File is already closed" + py_buffer = <bytes>c_buffer[:size] + self._filelike.write(py_buffer) + except: + size = -1 + self._exc_context._store_raised() + finally: + return size # and swallow any further exceptions + + cdef int close(self) noexcept: + retval = 0 + try: + if self._close_filelike is not None: + self._close_filelike() + # we should not close the file here as we didn't open it + self._filelike = None + except: + retval = -1 + self._exc_context._store_raised() + finally: + return retval # and swallow any further exceptions + +cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept: + return (<_FilelikeWriter>ctxt).write(c_buffer, length) + +cdef int _closeFilelikeWriter(void* ctxt) noexcept: + return (<_FilelikeWriter>ctxt).close() + +cdef _tofilelike(f, _Element element, encoding, doctype, method, + bint write_xml_declaration, bint write_doctype, + bint pretty_print, bint with_tail, int standalone, + int compression): + cdef _FilelikeWriter writer = None + cdef tree.xmlOutputBuffer* c_buffer + cdef tree.xmlCharEncodingHandler* enchandler + cdef const_char* c_enc + cdef const_xmlChar* c_doctype + cdef int error_result + + c_method = _findOutputMethod(method) + if c_method == OUTPUT_METHOD_TEXT: + data = _textToString(element._c_node, encoding, with_tail) + if compression: + bytes_out = BytesIO() + with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file: + gzip_file.write(data) + data = bytes_out.getvalue() + f = _getFSPathOrObject(f) + if _isString(f): + filename8 = _encodeFilename(f) + with open(filename8, 'wb') as f: + f.write(data) + else: + f.write(data) + return + + if encoding is None: + c_enc = NULL + else: + encoding = _utf8(encoding) + c_enc = _cstr(encoding) + if doctype is None: + c_doctype = NULL + else: + doctype = _utf8(doctype) + c_doctype = _xcstr(doctype) + + writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False) + if writer is None: + with nogil: + error_result = _serialise_node( + c_buffer, c_doctype, c_enc, element._c_node, c_method, + write_xml_declaration, write_doctype, pretty_print, with_tail, standalone) + else: + error_result = _serialise_node( + c_buffer, c_doctype, c_enc, element._c_node, c_method, + write_xml_declaration, write_doctype, pretty_print, with_tail, standalone) + + if writer is not None: + writer._exc_context._raise_if_stored() + if error_result != xmlerror.XML_ERR_OK: + _raiseSerialisationError(error_result) + + +cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype, + const_char* c_enc, xmlNode* c_node, int c_method, + bint write_xml_declaration, bint write_doctype, bint pretty_print, + bint with_tail, int standalone) noexcept nogil: + _writeNodeToBuffer( + c_buffer, c_node, c_enc, c_doctype, c_method, + write_xml_declaration, write_doctype, pretty_print, with_tail, standalone) + error_result = c_buffer.error + if error_result == xmlerror.XML_ERR_OK: + error_result = tree.xmlOutputBufferClose(c_buffer) + if error_result != -1: + error_result = xmlerror.XML_ERR_OK + else: + tree.xmlOutputBufferClose(c_buffer) + return error_result + + +cdef _FilelikeWriter _create_output_buffer( + f, const_char* c_enc, int c_compression, + tree.xmlOutputBuffer** c_buffer_ret, bint close): + cdef tree.xmlOutputBuffer* c_buffer + cdef _FilelikeWriter writer + cdef bytes filename8 + enchandler = tree.xmlFindCharEncodingHandler(c_enc) + if enchandler is NULL: + raise LookupError( + f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'") + try: + f = _getFSPathOrObject(f) + if _isString(f): + filename8 = _encodeFilename(f) + if b'%' in filename8 and ( + # Exclude absolute Windows paths and file:// URLs. + _isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH) + or filename8[:7].lower() == b'file://'): + # A file path (not a URL) containing the '%' URL escape character. + # libxml2 uses URL-unescaping on these, so escape the path before passing it in. + filename8 = filename8.replace(b'%', b'%25') + c_buffer = tree.xmlOutputBufferCreateFilename( + _cstr(filename8), enchandler, c_compression) + if c_buffer is NULL: + python.PyErr_SetFromErrno(IOError) # raises IOError + writer = None + elif hasattr(f, 'write'): + writer = _FilelikeWriter(f, compression=c_compression, close=close) + c_buffer = writer._createOutputBuffer(enchandler) + else: + raise TypeError( + f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'") + except: + tree.xmlCharEncCloseFunc(enchandler) + raise + c_buffer_ret[0] = c_buffer + return writer + +cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL: + cdef size_t i, num_ns_prefixes = len(ns_prefixes) + # Need to allocate one extra memory block to handle last NULL entry + c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*)) + if not c_ns_prefixes: + raise MemoryError() + i = 0 + try: + for prefix in ns_prefixes: + prefix_utf = _utf8(prefix) + c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), len(prefix_utf)) + if c_prefix: + # unknown prefixes do not need to get serialised + c_ns_prefixes[i] = <xmlChar*>c_prefix + i += 1 + except: + python.lxml_free(c_ns_prefixes) + raise + + c_ns_prefixes[i] = NULL # append end marker + return c_ns_prefixes + +cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments, + int compression, inclusive_ns_prefixes): + cdef _FilelikeWriter writer = None + cdef tree.xmlOutputBuffer* c_buffer + cdef xmlChar **c_inclusive_ns_prefixes = NULL + cdef char* c_filename + cdef xmlDoc* c_base_doc + cdef xmlDoc* c_doc + cdef int bytes_count, error = 0 + + c_base_doc = element._c_node.doc + c_doc = _fakeRootDoc(c_base_doc, element._c_node) + try: + c_inclusive_ns_prefixes = ( + _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) + if inclusive_ns_prefixes else NULL) + + f = _getFSPathOrObject(f) + if _isString(f): + filename8 = _encodeFilename(f) + c_filename = _cstr(filename8) + with nogil: + error = c14n.xmlC14NDocSave( + c_doc, NULL, exclusive, c_inclusive_ns_prefixes, + with_comments, c_filename, compression) + elif hasattr(f, 'write'): + writer = _FilelikeWriter(f, compression=compression) + c_buffer = writer._createOutputBuffer(NULL) + try: + with writer.error_log: + bytes_count = c14n.xmlC14NDocSaveTo( + c_doc, NULL, exclusive, c_inclusive_ns_prefixes, + with_comments, c_buffer) + finally: + error = tree.xmlOutputBufferClose(c_buffer) + if bytes_count < 0: + error = bytes_count + elif error != -1: + error = xmlerror.XML_ERR_OK + else: + raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'") + finally: + _destroyFakeDoc(c_base_doc, c_doc) + if c_inclusive_ns_prefixes is not NULL: + python.lxml_free(c_inclusive_ns_prefixes) + + if writer is not None: + writer._exc_context._raise_if_stored() + + if error < 0: + message = "C14N failed" + if writer is not None: + errors = writer.error_log + if len(errors): + message = errors[0].message + raise C14NError(message) + + +# C14N 2.0 + +def canonicalize(xml_data=None, *, out=None, from_file=None, **options): + """Convert XML to its C14N 2.0 serialised form. + + If *out* is provided, it must be a file or file-like object that receives + the serialised canonical XML output (text, not bytes) through its ``.write()`` + method. To write to a file, open it in text mode with encoding "utf-8". + If *out* is not provided, this function returns the output as text string. + + Either *xml_data* (an XML string, tree or Element) or *file* + (a file path or file-like object) must be provided as input. + + The configuration options are the same as for the ``C14NWriterTarget``. + """ + if xml_data is None and from_file is None: + raise ValueError("Either 'xml_data' or 'from_file' must be provided as input") + + sio = None + if out is None: + sio = out = StringIO() + + target = C14NWriterTarget(out.write, **options) + + if xml_data is not None and not isinstance(xml_data, basestring): + _tree_to_target(xml_data, target) + return sio.getvalue() if sio is not None else None + + cdef _FeedParser parser = XMLParser( + target=target, + attribute_defaults=True, + collect_ids=False, + ) + + if xml_data is not None: + parser.feed(xml_data) + parser.close() + elif from_file is not None: + try: + _parseDocument(from_file, parser, base_url=None) + except _TargetParserResult: + pass + + return sio.getvalue() if sio is not None else None + + +cdef _tree_to_target(element, target): + for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')): + text = None + if event == 'start': + target.start(elem.tag, elem.attrib) + text = elem.text + elif event == 'end': + target.end(elem.tag) + text = elem.tail + elif event == 'start-ns': + target.start_ns(*elem) + continue + elif event == 'comment': + target.comment(elem.text) + text = elem.tail + elif event == 'pi': + target.pi(elem.target, elem.text) + text = elem.tail + if text: + target.data(text) + return target.close() + + +cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match + + +cdef class C14NWriterTarget: + """ + Canonicalization writer target for the XMLParser. + + Serialises parse events to XML C14N 2.0. + + Configuration options: + + - *with_comments*: set to true to include comments + - *strip_text*: set to true to strip whitespace before and after text content + - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}" + - *qname_aware_tags*: a set of qname aware tag names in which prefixes + should be replaced in text content + - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes + should be replaced in text content + - *exclude_attrs*: a set of attribute names that should not be serialised + - *exclude_tags*: a set of tag names that should not be serialised + """ + cdef object _write + cdef list _data + cdef set _qname_aware_tags + cdef object _find_qname_aware_attrs + cdef list _declared_ns_stack + cdef list _ns_stack + cdef dict _prefix_map + cdef list _preserve_space + cdef tuple _pending_start + cdef set _exclude_tags + cdef set _exclude_attrs + cdef Py_ssize_t _ignored_depth + cdef bint _with_comments + cdef bint _strip_text + cdef bint _rewrite_prefixes + cdef bint _root_seen + cdef bint _root_done + + def __init__(self, write, *, + with_comments=False, strip_text=False, rewrite_prefixes=False, + qname_aware_tags=None, qname_aware_attrs=None, + exclude_attrs=None, exclude_tags=None): + self._write = write + self._data = [] + self._with_comments = with_comments + self._strip_text = strip_text + self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None + self._exclude_tags = set(exclude_tags) if exclude_tags else None + + self._rewrite_prefixes = rewrite_prefixes + if qname_aware_tags: + self._qname_aware_tags = set(qname_aware_tags) + else: + self._qname_aware_tags = None + if qname_aware_attrs: + self._find_qname_aware_attrs = set(qname_aware_attrs).intersection + else: + self._find_qname_aware_attrs = None + + # Stack with globally and newly declared namespaces as (uri, prefix) pairs. + self._declared_ns_stack = [[ + ("http://www.w3.org/XML/1998/namespace", "xml"), + ]] + # Stack with user declared namespace prefixes as (uri, prefix) pairs. + self._ns_stack = [] + if not rewrite_prefixes: + self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS) + self._ns_stack.append([]) + self._prefix_map = {} + self._preserve_space = [False] + self._pending_start = None + self._ignored_depth = 0 + self._root_seen = False + self._root_done = False + + def _iter_namespaces(self, ns_stack): + for namespaces in reversed(ns_stack): + if namespaces: # almost no element declares new namespaces + yield from namespaces + + cdef _resolve_prefix_name(self, prefixed_name): + prefix, name = prefixed_name.split(':', 1) + for uri, p in self._iter_namespaces(self._ns_stack): + if p == prefix: + return f'{{{uri}}}{name}' + raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope') + + cdef _qname(self, qname, uri=None): + if uri is None: + uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname) + else: + tag = qname + + prefixes_seen = set() + for u, prefix in self._iter_namespaces(self._declared_ns_stack): + if u == uri and prefix not in prefixes_seen: + return f'{prefix}:{tag}' if prefix else tag, tag, uri + prefixes_seen.add(prefix) + + # Not declared yet => add new declaration. + if self._rewrite_prefixes: + if uri in self._prefix_map: + prefix = self._prefix_map[uri] + else: + prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}' + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}', tag, uri + + if not uri and '' not in prefixes_seen: + # No default namespace declared => no prefix needed. + return tag, tag, uri + + for u, prefix in self._iter_namespaces(self._ns_stack): + if u == uri: + self._declared_ns_stack[-1].append((uri, prefix)) + return f'{prefix}:{tag}' if prefix else tag, tag, uri + + if not uri: + # As soon as a default namespace is defined, + # anything that has no namespace (and thus, no prefix) goes there. + return tag, tag, uri + + raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope') + + def data(self, data): + if not self._ignored_depth: + self._data.append(data) + + cdef _flush(self): + cdef unicode data = ''.join(self._data) + del self._data[:] + if self._strip_text and not self._preserve_space[-1]: + data = data.strip() + if self._pending_start is not None: + (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None + qname_text = data if ':' in data and _looks_like_prefix_name(data) else None + self._start(tag, attrs, new_namespaces, qname_text) + if qname_text is not None: + return + if data and self._root_seen: + self._write(_escape_cdata_c14n(data)) + + def start_ns(self, prefix, uri): + if self._ignored_depth: + return + # we may have to resolve qnames in text content + if self._data: + self._flush() + self._ns_stack[-1].append((uri, prefix)) + + def start(self, tag, attrs): + if self._exclude_tags is not None and ( + self._ignored_depth or tag in self._exclude_tags): + self._ignored_depth += 1 + return + if self._data: + self._flush() + + new_namespaces = [] + self._declared_ns_stack.append(new_namespaces) + + if self._qname_aware_tags is not None and tag in self._qname_aware_tags: + # Need to parse text first to see if it requires a prefix declaration. + self._pending_start = (tag, attrs, new_namespaces) + return + self._start(tag, attrs, new_namespaces) + + cdef _start(self, tag, attrs, new_namespaces, qname_text=None): + if self._exclude_attrs is not None and attrs: + attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs} + + qnames = {tag, *attrs} + resolved_names = {} + + # Resolve prefixes in attribute and tag text. + if qname_text is not None: + qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text) + qnames.add(qname) + if self._find_qname_aware_attrs is not None and attrs: + qattrs = self._find_qname_aware_attrs(attrs) + if qattrs: + for attr_name in qattrs: + value = attrs[attr_name] + if _looks_like_prefix_name(value): + qname = resolved_names[value] = self._resolve_prefix_name(value) + qnames.add(qname) + else: + qattrs = None + else: + qattrs = None + + # Assign prefixes in lexicographical order of used URIs. + parsed_qnames = {n: self._qname(n) for n in sorted( + qnames, key=lambda n: n.split('}', 1))} + + # Write namespace declarations in prefix order ... + if new_namespaces: + attr_list = [ + ('xmlns:' + prefix if prefix else 'xmlns', uri) + for uri, prefix in new_namespaces + ] + attr_list.sort() + else: + # almost always empty + attr_list = [] + + # ... followed by attributes in URI+name order + if attrs: + for k, v in sorted(attrs.items()): + if qattrs is not None and k in qattrs and v in resolved_names: + v = parsed_qnames[resolved_names[v]][0] + attr_qname, attr_name, uri = parsed_qnames[k] + # No prefix for attributes in default ('') namespace. + attr_list.append((attr_qname if uri else attr_name, v)) + + # Honour xml:space attributes. + space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space') + self._preserve_space.append( + space_behaviour == 'preserve' if space_behaviour + else self._preserve_space[-1]) + + # Write the tag. + write = self._write + write('<' + parsed_qnames[tag][0]) + if attr_list: + write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list])) + write('>') + + # Write the resolved qname text content. + if qname_text is not None: + write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0])) + + self._root_seen = True + self._ns_stack.append([]) + + def end(self, tag): + if self._ignored_depth: + self._ignored_depth -= 1 + return + if self._data: + self._flush() + self._write(f'</{self._qname(tag)[0]}>') + self._preserve_space.pop() + self._root_done = len(self._preserve_space) == 1 + self._declared_ns_stack.pop() + self._ns_stack.pop() + + def comment(self, text): + if not self._with_comments: + return + if self._ignored_depth: + return + if self._root_done: + self._write('\n') + elif self._root_seen and self._data: + self._flush() + self._write(f'<!--{_escape_cdata_c14n(text)}-->') + if not self._root_seen: + self._write('\n') + + def pi(self, target, data): + if self._ignored_depth: + return + if self._root_done: + self._write('\n') + elif self._root_seen and self._data: + self._flush() + self._write( + f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>') + if not self._root_seen: + self._write('\n') + + def close(self): + return None + + +cdef _raise_serialization_error(text): + raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__)) + + +cdef unicode _escape_cdata_c14n(stext): + # escape character data + cdef unicode text + cdef Py_UCS4 ch + cdef Py_ssize_t start = 0, pos = 0 + cdef list substrings = None + try: + text = unicode(stext) + except (TypeError, AttributeError): + return _raise_serialization_error(stext) + + for pos, ch in enumerate(text): + if ch == '&': + escape = '&' + elif ch == '<': + escape = '<' + elif ch == '>': + escape = '>' + elif ch == '\r': + escape = '
' + else: + continue + + if substrings is None: + substrings = [] + if pos > start: + substrings.append(text[start:pos]) + substrings.append(escape) + start = pos + 1 + + if substrings is None: + return text + if pos >= start: + substrings.append(text[start:pos+1]) + return ''.join(substrings) + + +cdef unicode _escape_attrib_c14n(stext): + # escape attribute value + cdef unicode text + cdef Py_UCS4 ch + cdef Py_ssize_t start = 0, pos = 0 + cdef list substrings = None + try: + text = unicode(stext) + except (TypeError, AttributeError): + return _raise_serialization_error(stext) + + for pos, ch in enumerate(text): + if ch == '&': + escape = '&' + elif ch == '<': + escape = '<' + elif ch == '"': + escape = '"' + elif ch == '\t': + escape = '	' + elif ch == '\n': + escape = '
' + elif ch == '\r': + escape = '
' + else: + continue + + if substrings is None: + substrings = [] + if pos > start: + substrings.append(text[start:pos]) + substrings.append(escape) + start = pos + 1 + + if substrings is None: + return text + if pos >= start: + substrings.append(text[start:pos+1]) + return ''.join(substrings) + + +# incremental serialisation + +cdef class xmlfile: + """xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True) + + A simple mechanism for incremental XML serialisation. + + Usage example:: + + with xmlfile("somefile.xml", encoding='utf-8') as xf: + xf.write_declaration(standalone=True) + xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">') + + # generate an element (the root element) + with xf.element('root'): + # write a complete Element into the open root element + xf.write(etree.Element('test')) + + # generate and write more Elements, e.g. through iterparse + for element in generate_some_elements(): + # serialise generated elements into the XML file + xf.write(element) + + # or write multiple Elements or strings at once + xf.write(etree.Element('start'), "text", etree.Element('end')) + + If 'output_file' is a file(-like) object, passing ``close=True`` will + close it when exiting the context manager. By default, it is left + to the owner to do that. When a file path is used, lxml will take care + of opening and closing the file itself. Also, when a compression level + is set, lxml will deliberately close the file to make sure all data gets + compressed and written. + + Setting ``buffered=False`` will flush the output after each operation, + such as opening or closing an ``xf.element()`` block or calling + ``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to + explicitly flush any pending output when buffering is enabled. + """ + cdef object output_file + cdef bytes encoding + cdef _IncrementalFileWriter writer + cdef _AsyncIncrementalFileWriter async_writer + cdef int compresslevel + cdef bint close + cdef bint buffered + cdef int method + + def __init__(self, output_file not None, encoding=None, compression=None, + close=False, buffered=True): + self.output_file = output_file + self.encoding = _utf8orNone(encoding) + self.compresslevel = compression or 0 + self.close = close + self.buffered = buffered + self.method = OUTPUT_METHOD_XML + + def __enter__(self): + assert self.output_file is not None + self.writer = _IncrementalFileWriter( + self.output_file, self.encoding, self.compresslevel, + self.close, self.buffered, self.method) + return self.writer + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.writer is not None: + old_writer, self.writer = self.writer, None + raise_on_error = exc_type is None + old_writer._close(raise_on_error) + if self.close: + self.output_file = None + + async def __aenter__(self): + assert self.output_file is not None + if isinstance(self.output_file, basestring): + raise TypeError("Cannot asynchronously write to a plain file") + if not hasattr(self.output_file, 'write'): + raise TypeError("Output file needs an async .write() method") + self.async_writer = _AsyncIncrementalFileWriter( + self.output_file, self.encoding, self.compresslevel, + self.close, self.buffered, self.method) + return self.async_writer + + async def __aexit__(self, exc_type, exc_val, exc_tb): + if self.async_writer is not None: + old_writer, self.async_writer = self.async_writer, None + raise_on_error = exc_type is None + await old_writer._close(raise_on_error) + if self.close: + self.output_file = None + + +cdef class htmlfile(xmlfile): + """htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True) + + A simple mechanism for incremental HTML serialisation. Works the same as + xmlfile. + """ + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.method = OUTPUT_METHOD_HTML + + +cdef enum _IncrementalFileWriterStatus: + WRITER_STARTING = 0 + WRITER_DECL_WRITTEN = 1 + WRITER_DTD_WRITTEN = 2 + WRITER_IN_ELEMENT = 3 + WRITER_FINISHED = 4 + + +@cython.final +@cython.internal +cdef class _IncrementalFileWriter: + cdef tree.xmlOutputBuffer* _c_out + cdef bytes _encoding + cdef const_char* _c_encoding + cdef _FilelikeWriter _target + cdef list _element_stack + cdef int _status + cdef int _method + cdef bint _buffered + + def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close, + bint buffered, int method): + self._status = WRITER_STARTING + self._element_stack = [] + if encoding is None: + # We always need a document encoding to make the attribute serialisation + # of libxml2 identical to ours. + encoding = b'ASCII' + self._encoding = encoding + self._c_encoding = _cstr(encoding) + self._buffered = buffered + self._target = _create_output_buffer( + outfile, self._c_encoding, compresslevel, &self._c_out, close) + self._method = method + + def __dealloc__(self): + if self._c_out is not NULL: + tree.xmlOutputBufferClose(self._c_out) + + def write_declaration(self, version=None, standalone=None, doctype=None): + """write_declaration(self, version=None, standalone=None, doctype=None) + + Write an XML declaration and (optionally) a doctype into the file. + """ + assert self._c_out is not NULL + cdef const_xmlChar* c_version + cdef int c_standalone + if self._method != OUTPUT_METHOD_XML: + raise LxmlSyntaxError("only XML documents have declarations") + if self._status >= WRITER_DECL_WRITTEN: + raise LxmlSyntaxError("XML declaration already written") + version = _utf8orNone(version) + c_version = _xcstr(version) if version is not None else NULL + doctype = _utf8orNone(doctype) + if standalone is None: + c_standalone = -1 + else: + c_standalone = 1 if standalone else 0 + _writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone) + if doctype is not None: + _writeDoctype(self._c_out, _xcstr(doctype)) + self._status = WRITER_DTD_WRITTEN + else: + self._status = WRITER_DECL_WRITTEN + if not self._buffered: + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + def write_doctype(self, doctype): + """write_doctype(self, doctype) + + Writes the given doctype declaration verbatimly into the file. + """ + assert self._c_out is not NULL + if doctype is None: + return + if self._status >= WRITER_DTD_WRITTEN: + raise LxmlSyntaxError("DOCTYPE already written or cannot write it here") + doctype = _utf8(doctype) + _writeDoctype(self._c_out, _xcstr(doctype)) + self._status = WRITER_DTD_WRITTEN + if not self._buffered: + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + def method(self, method): + """method(self, method) + + Returns a context manager that overrides and restores the output method. + method is one of (None, 'xml', 'html') where None means 'xml'. + """ + assert self._c_out is not NULL + c_method = self._method if method is None else _findOutputMethod(method) + return _MethodChanger(self, c_method) + + def element(self, tag, attrib=None, nsmap=None, method=None, **_extra): + """element(self, tag, attrib=None, nsmap=None, method, **_extra) + + Returns a context manager that writes an opening and closing tag. + method is one of (None, 'xml', 'html') where None means 'xml'. + """ + assert self._c_out is not NULL + attributes = [] + if attrib is not None: + for name, value in _iter_attrib(attrib): + if name not in _extra: + ns, name = _getNsTag(name) + attributes.append((ns, name, _utf8(value))) + if _extra: + for name, value in _extra.iteritems(): + ns, name = _getNsTag(name) + attributes.append((ns, name, _utf8(value))) + reversed_nsmap = {} + if nsmap: + for prefix, ns in nsmap.items(): + if prefix is not None: + prefix = _utf8(prefix) + _prefixValidOrRaise(prefix) + reversed_nsmap[_utf8(ns)] = prefix + ns, name = _getNsTag(tag) + + c_method = self._method if method is None else _findOutputMethod(method) + + return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method) + + cdef _write_qname(self, bytes name, bytes prefix): + if prefix: # empty bytes for no prefix (not None to allow sorting) + tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix)) + tree.xmlOutputBufferWrite(self._c_out, 1, ':') + tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name)) + + cdef _write_start_element(self, element_config): + if self._status > WRITER_IN_ELEMENT: + raise LxmlSyntaxError("cannot append trailing element to complete XML document") + ns, name, attributes, nsmap = element_config + flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap) + prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces) + tree.xmlOutputBufferWrite(self._c_out, 1, '<') + self._write_qname(name, prefix) + + self._write_attributes_and_namespaces( + attributes, flat_namespace_map, new_namespaces) + + tree.xmlOutputBufferWrite(self._c_out, 1, '>') + if not self._buffered: + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + self._element_stack.append((ns, name, prefix, flat_namespace_map)) + self._status = WRITER_IN_ELEMENT + + cdef _write_attributes_and_namespaces(self, list attributes, + dict flat_namespace_map, + list new_namespaces): + if attributes: + # _find_prefix() may append to new_namespaces => build them first + attributes = [ + (self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value) + for ns, name, value in attributes ] + if new_namespaces: + new_namespaces.sort() + self._write_attributes_list(new_namespaces) + if attributes: + self._write_attributes_list(attributes) + + cdef _write_attributes_list(self, list attributes): + for prefix, name, value in attributes: + tree.xmlOutputBufferWrite(self._c_out, 1, ' ') + self._write_qname(name, prefix) + tree.xmlOutputBufferWrite(self._c_out, 2, '="') + _write_attr_string(self._c_out, _cstr(value)) + + tree.xmlOutputBufferWrite(self._c_out, 1, '"') + + cdef _write_end_element(self, element_config): + if self._status != WRITER_IN_ELEMENT: + raise LxmlSyntaxError("not in an element") + if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]: + raise LxmlSyntaxError("inconsistent exit action in context manager") + + # If previous write operations failed, the context manager exit might still call us. + # That is ok, but we stop writing closing tags and handling errors in that case. + # For all non-I/O errors, we continue writing closing tags if we can. + ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK + + name, prefix = self._element_stack.pop()[1:3] + if ok_to_write: + tree.xmlOutputBufferWrite(self._c_out, 2, '</') + self._write_qname(name, prefix) + tree.xmlOutputBufferWrite(self._c_out, 1, '>') + + if not self._element_stack: + self._status = WRITER_FINISHED + if ok_to_write: + if not self._buffered: + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces): + if href is None: + return None + if href in flat_namespaces_map: + return flat_namespaces_map[href] + # need to create a new prefix + prefixes = flat_namespaces_map.values() + i = 0 + while True: + prefix = _utf8('ns%d' % i) + if prefix not in prefixes: + new_namespaces.append((b'xmlns', prefix, href)) + flat_namespaces_map[href] = prefix + return prefix + i += 1 + + cdef _collect_namespaces(self, dict nsmap): + new_namespaces = [] + flat_namespaces_map = {} + for ns, prefix in nsmap.iteritems(): + flat_namespaces_map[ns] = prefix + if prefix is None: + # use empty bytes rather than None to allow sorting + new_namespaces.append((b'', b'xmlns', ns)) + else: + new_namespaces.append((b'xmlns', prefix, ns)) + # merge in flat namespace map of parent + if self._element_stack: + for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems(): + if flat_namespaces_map.get(ns) is None: + # unknown or empty prefix => prefer a 'real' prefix + flat_namespaces_map[ns] = prefix + return flat_namespaces_map, new_namespaces + + def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None): + """write(self, *args, with_tail=True, pretty_print=False, method=None) + + Write subtrees or strings into the file. + + If method is not None, it should be one of ('html', 'xml', 'text') + to temporarily override the output method. + """ + assert self._c_out is not NULL + c_method = self._method if method is None else _findOutputMethod(method) + + for content in args: + if _isString(content): + if self._status != WRITER_IN_ELEMENT: + if self._status > WRITER_IN_ELEMENT or content.strip(): + raise LxmlSyntaxError("not in an element") + bstring = _utf8(content) + if not bstring: + continue + + ns, name, _, _ = self._element_stack[-1] + if (c_method == OUTPUT_METHOD_HTML and + ns in (None, b'http://www.w3.org/1999/xhtml') and + name in (b'script', b'style')): + tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring)) + + else: + tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL) + + elif iselement(content): + if self._status > WRITER_IN_ELEMENT: + raise LxmlSyntaxError("cannot append trailing element to complete XML document") + _writeNodeToBuffer(self._c_out, (<_Element>content)._c_node, + self._c_encoding, NULL, c_method, + False, False, pretty_print, with_tail, False) + if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE: + if not self._element_stack: + self._status = WRITER_FINISHED + + elif content is not None: + raise TypeError( + f"got invalid input value of type {type(content)}, expected string or Element") + self._handle_error(self._c_out.error) + if not self._buffered: + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + def flush(self): + """flush(self) + + Write any pending content of the current output buffer to the stream. + """ + assert self._c_out is not NULL + tree.xmlOutputBufferFlush(self._c_out) + self._handle_error(self._c_out.error) + + cdef _close(self, bint raise_on_error): + if raise_on_error: + if self._status < WRITER_IN_ELEMENT: + raise LxmlSyntaxError("no content written") + if self._element_stack: + raise LxmlSyntaxError("pending open tags on close") + error_result = self._c_out.error + if error_result == xmlerror.XML_ERR_OK: + error_result = tree.xmlOutputBufferClose(self._c_out) + if error_result != -1: + error_result = xmlerror.XML_ERR_OK + else: + tree.xmlOutputBufferClose(self._c_out) + self._status = WRITER_FINISHED + self._c_out = NULL + del self._element_stack[:] + if raise_on_error: + self._handle_error(error_result) + + cdef _handle_error(self, int error_result): + if error_result != xmlerror.XML_ERR_OK: + if self._target is not None: + self._target._exc_context._raise_if_stored() + _raiseSerialisationError(error_result) + + +@cython.final +@cython.internal +cdef class _AsyncDataWriter: + cdef list _data + def __cinit__(self): + self._data = [] + + cdef bytes collect(self): + data = b''.join(self._data) + del self._data[:] + return data + + def write(self, data): + self._data.append(data) + + def close(self): + pass + + +@cython.final +@cython.internal +cdef class _AsyncIncrementalFileWriter: + cdef _IncrementalFileWriter _writer + cdef _AsyncDataWriter _buffer + cdef object _async_outfile + cdef int _flush_after_writes + cdef bint _should_close + cdef bint _buffered + + def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close, + bint buffered, int method): + self._flush_after_writes = 20 + self._async_outfile = async_outfile + self._should_close = close + self._buffered = buffered + self._buffer = _AsyncDataWriter() + self._writer = _IncrementalFileWriter( + self._buffer, encoding, compresslevel, close=True, buffered=False, method=method) + + cdef bytes _flush(self): + if not self._buffered or len(self._buffer._data) > self._flush_after_writes: + return self._buffer.collect() + return None + + async def flush(self): + self._writer.flush() + data = self._buffer.collect() + if data: + await self._async_outfile.write(data) + + async def write_declaration(self, version=None, standalone=None, doctype=None): + self._writer.write_declaration(version, standalone, doctype) + data = self._flush() + if data: + await self._async_outfile.write(data) + + async def write_doctype(self, doctype): + self._writer.write_doctype(doctype) + data = self._flush() + if data: + await self._async_outfile.write(data) + + async def write(self, *args, with_tail=True, pretty_print=False, method=None): + self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method) + data = self._flush() + if data: + await self._async_outfile.write(data) + + def method(self, method): + return self._writer.method(method) + + def element(self, tag, attrib=None, nsmap=None, method=None, **_extra): + element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra) + return _AsyncFileWriterElement(element_writer, self) + + async def _close(self, bint raise_on_error): + self._writer._close(raise_on_error) + data = self._buffer.collect() + if data: + await self._async_outfile.write(data) + if self._should_close: + await self._async_outfile.close() + + +@cython.final +@cython.internal +cdef class _AsyncFileWriterElement: + cdef _FileWriterElement _element_writer + cdef _AsyncIncrementalFileWriter _writer + + def __cinit__(self, _FileWriterElement element_writer not None, + _AsyncIncrementalFileWriter writer not None): + self._element_writer = element_writer + self._writer = writer + + async def __aenter__(self): + self._element_writer.__enter__() + data = self._writer._flush() + if data: + await self._writer._async_outfile.write(data) + + async def __aexit__(self, *args): + self._element_writer.__exit__(*args) + data = self._writer._flush() + if data: + await self._writer._async_outfile.write(data) + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _FileWriterElement: + cdef _IncrementalFileWriter _writer + cdef object _element + cdef int _new_method + cdef int _old_method + + def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method): + self._writer = writer + self._element = element_config + self._new_method = method + self._old_method = writer._method + + def __enter__(self): + self._writer._method = self._new_method + self._writer._write_start_element(self._element) + + def __exit__(self, exc_type, exc_val, exc_tb): + self._writer._write_end_element(self._element) + self._writer._method = self._old_method + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _MethodChanger: + cdef _IncrementalFileWriter _writer + cdef int _new_method + cdef int _old_method + cdef bint _entered + cdef bint _exited + + def __cinit__(self, _IncrementalFileWriter writer not None, int method): + self._writer = writer + self._new_method = method + self._old_method = writer._method + self._entered = False + self._exited = False + + def __enter__(self): + if self._entered: + raise LxmlSyntaxError("Inconsistent enter action in context manager") + self._writer._method = self._new_method + self._entered = True + + def __exit__(self, exc_type, exc_val, exc_tb): + if self._exited: + raise LxmlSyntaxError("Inconsistent exit action in context manager") + if self._writer._method != self._new_method: + raise LxmlSyntaxError("Method changed outside of context manager") + self._writer._method = self._old_method + self._exited = True + + async def __aenter__(self): + # for your async convenience + return self.__enter__() + + async def __aexit__(self, *args): + # for your async convenience + return self.__exit__(*args) diff --git a/.venv/lib/python3.12/site-packages/lxml/usedoctest.py b/.venv/lib/python3.12/site-packages/lxml/usedoctest.py new file mode 100644 index 00000000..f1da8cad --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/usedoctest.py @@ -0,0 +1,13 @@ +"""Doctest module for XML comparison. + +Usage:: + + >>> import lxml.usedoctest + >>> # now do your XML doctests ... + +See `lxml.doctestcompare` +""" + +from lxml import doctestcompare + +doctestcompare.temp_install(del_module=__name__) diff --git a/.venv/lib/python3.12/site-packages/lxml/xinclude.pxi b/.venv/lib/python3.12/site-packages/lxml/xinclude.pxi new file mode 100644 index 00000000..5c9ac450 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xinclude.pxi @@ -0,0 +1,67 @@ +# XInclude processing + +from lxml.includes cimport xinclude + + +cdef class XIncludeError(LxmlError): + """Error during XInclude processing. + """ + + +cdef class XInclude: + """XInclude(self) + XInclude processor. + + Create an instance and call it on an Element to run XInclude + processing. + """ + cdef _ErrorLog _error_log + def __init__(self): + self._error_log = _ErrorLog() + + @property + def error_log(self): + assert self._error_log is not None, "XInclude instance not initialised" + return self._error_log.copy() + + def __call__(self, _Element node not None): + "__call__(self, node)" + # We cannot pass the XML_PARSE_NOXINCNODE option as this would free + # the XInclude nodes - there may still be Python references to them! + # Therefore, we allow XInclude nodes to be converted to + # XML_XINCLUDE_START nodes. XML_XINCLUDE_END nodes are added as + # siblings. Tree traversal will simply ignore them as they are not + # typed as elements. The included fragment is added between the two, + # i.e. as a sibling, which does not conflict with traversal. + cdef int result + _assertValidNode(node) + assert self._error_log is not None, "XInclude processor not initialised" + if node._doc._parser is not None: + parse_options = node._doc._parser._parse_options + context = node._doc._parser._getParserContext() + c_context = <void*>context + else: + parse_options = 0 + context = None + c_context = NULL + + self._error_log.connect() + if tree.LIBXML_VERSION < 20704 or not c_context: + __GLOBAL_PARSER_CONTEXT.pushImpliedContext(context) + with nogil: + orig_loader = _register_document_loader() + if c_context: + result = xinclude.xmlXIncludeProcessTreeFlagsData( + node._c_node, parse_options, c_context) + else: + result = xinclude.xmlXIncludeProcessTree(node._c_node) + _reset_document_loader(orig_loader) + if tree.LIBXML_VERSION < 20704 or not c_context: + __GLOBAL_PARSER_CONTEXT.popImpliedContext() + self._error_log.disconnect() + + if result == -1: + raise XIncludeError( + self._error_log._buildExceptionMessage( + "XInclude processing failed"), + self._error_log) diff --git a/.venv/lib/python3.12/site-packages/lxml/xmlerror.pxi b/.venv/lib/python3.12/site-packages/lxml/xmlerror.pxi new file mode 100644 index 00000000..79442a8b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xmlerror.pxi @@ -0,0 +1,1654 @@ +# DEBUG and error logging + +from lxml.includes cimport xmlerror +from lxml cimport cvarargs + +DEF GLOBAL_ERROR_LOG = "_GlobalErrorLog" +DEF XSLT_ERROR_LOG = "_XSLTErrorLog" + +# module level API functions + +def clear_error_log(): + """clear_error_log() + + Clear the global error log. Note that this log is already bound to a + fixed size. + + Note: since lxml 2.2, the global error log is local to a thread + and this function will only clear the global error log of the + current thread. + """ + _getThreadErrorLog(GLOBAL_ERROR_LOG).clear() + + +# setup for global log: + +cdef void _initThreadLogging() noexcept: + # Disable generic error lines from libxml2. + _connectGenericErrorLog(None) + + # Divert XSLT error messages to the global XSLT error log instead of stderr. + xslt.xsltSetGenericErrorFunc(NULL, <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) + + +# Logging classes + +@cython.final +@cython.freelist(16) +cdef class _LogEntry: + """A log message entry from an error log. + + Attributes: + + - message: the message text + - domain: the domain ID (see lxml.etree.ErrorDomains) + - type: the message type ID (see lxml.etree.ErrorTypes) + - level: the log level ID (see lxml.etree.ErrorLevels) + - line: the line at which the message originated (if applicable) + - column: the character column at which the message originated (if applicable) + - filename: the name of the file in which the message originated (if applicable) + - path: the location in which the error was found (if available) + """ + cdef readonly int domain + cdef readonly int type + cdef readonly int level + cdef readonly long line + cdef readonly int column + cdef basestring _message + cdef basestring _filename + cdef char* _c_message + cdef xmlChar* _c_filename + cdef xmlChar* _c_path + + def __dealloc__(self): + tree.xmlFree(self._c_message) + tree.xmlFree(self._c_filename) + tree.xmlFree(self._c_path) + + @cython.final + cdef int _setError(self, const xmlerror.xmlError* error) except -1: + self.domain = error.domain + self.type = error.code + self.level = <int>error.level + self.line = <long>error.line + self.column = error.int2 + self._c_message = NULL + self._c_filename = NULL + self._c_path = NULL + if (error.message is NULL or + error.message[0] == b'\0' or + error.message[0] == b'\n' and error.message[1] == b'\0'): + self._message = "unknown error" + else: + self._message = None + self._c_message = <char*> tree.xmlStrdup( + <const_xmlChar*> error.message) + if not self._c_message: + raise MemoryError() + if error.file is NULL: + self._filename = '<string>' + else: + self._filename = None + self._c_filename = tree.xmlStrdup(<const_xmlChar*> error.file) + if not self._c_filename: + raise MemoryError() + if error.node is not NULL: + self._c_path = tree.xmlGetNodePath(<xmlNode*> error.node) + c_line = tree.xmlGetLineNo(<xmlNode*> error.node) + if c_line > limits.INT_MAX: + self.line = c_line + + @cython.final + cdef _setGeneric(self, int domain, int type, int level, long line, + message, filename): + self.domain = domain + self.type = type + self.level = level + self.line = line + self.column = 0 + self._message = message + self._filename = filename + self._c_path = NULL + + def __repr__(self): + return "%s:%d:%d:%s:%s:%s: %s" % ( + self.filename, self.line, self.column, self.level_name, + self.domain_name, self.type_name, self.message) + + @property + def domain_name(self): + """The name of the error domain. See lxml.etree.ErrorDomains + """ + return ErrorDomains._getName(self.domain, "unknown") + + @property + def type_name(self): + """The name of the error type. See lxml.etree.ErrorTypes + """ + if self.domain == ErrorDomains.RELAXNGV: + getName = RelaxNGErrorTypes._getName + else: + getName = ErrorTypes._getName + return getName(self.type, "unknown") + + @property + def level_name(self): + """The name of the error level. See lxml.etree.ErrorLevels + """ + return ErrorLevels._getName(self.level, "unknown") + + @property + def message(self): + """The log message string. + """ + cdef size_t size + if self._message is not None: + return self._message + if self._c_message is NULL: + return None + size = cstring_h.strlen(self._c_message) + if size > 0 and self._c_message[size-1] == b'\n': + size -= 1 # strip EOL + # cannot use funicode() here because the message may contain + # byte encoded file paths etc. + try: + self._message = self._c_message[:size].decode('utf8') + except UnicodeDecodeError: + try: + self._message = self._c_message[:size].decode( + 'ascii', 'backslashreplace') + except UnicodeDecodeError: + self._message = '<undecodable error message>' + if self._c_message: + # clean up early + tree.xmlFree(self._c_message) + self._c_message = NULL + return self._message + + @property + def filename(self): + """The file path where the report originated, if any. + """ + if self._filename is None: + if self._c_filename is not NULL: + self._filename = _decodeFilename(self._c_filename) + # clean up early + tree.xmlFree(self._c_filename) + self._c_filename = NULL + return self._filename + + @property + def path(self): + """The XPath for the node where the error was detected. + """ + return funicode(self._c_path) if self._c_path is not NULL else None + + +cdef class _BaseErrorLog: + cdef _LogEntry _first_error + cdef readonly object last_error + def __init__(self, first_error, last_error): + self._first_error = first_error + self.last_error = last_error + + cpdef copy(self): + return _BaseErrorLog(self._first_error, self.last_error) + + def __repr__(self): + return '' + + cpdef receive(self, _LogEntry entry): + pass + + @cython.final + cdef int _receive(self, const xmlerror.xmlError* error) except -1: + cdef bint is_error + cdef _LogEntry entry + cdef _BaseErrorLog global_log + entry = _LogEntry.__new__(_LogEntry) + entry._setError(error) + is_error = error.level == xmlerror.XML_ERR_ERROR or \ + error.level == xmlerror.XML_ERR_FATAL + global_log = _getThreadErrorLog(GLOBAL_ERROR_LOG) + if global_log is not self: + global_log.receive(entry) + if is_error: + global_log.last_error = entry + self.receive(entry) + if is_error: + self.last_error = entry + + @cython.final + cdef int _receiveGeneric(self, int domain, int type, int level, long line, + message, filename) except -1: + cdef bint is_error + cdef _LogEntry entry + cdef _BaseErrorLog global_log + entry = _LogEntry.__new__(_LogEntry) + entry._setGeneric(domain, type, level, line, message, filename) + is_error = level == xmlerror.XML_ERR_ERROR or \ + level == xmlerror.XML_ERR_FATAL + global_log = _getThreadErrorLog(GLOBAL_ERROR_LOG) + if global_log is not self: + global_log.receive(entry) + if is_error: + global_log.last_error = entry + self.receive(entry) + if is_error: + self.last_error = entry + + @cython.final + cdef _buildParseException(self, exctype, default_message): + code = xmlerror.XML_ERR_INTERNAL_ERROR + if self._first_error is None: + return exctype(default_message, code, 0, 0) + message = self._first_error.message + if message: + code = self._first_error.type + else: + message = default_message + line = self._first_error.line + column = self._first_error.column + filename = self._first_error.filename + if line > 0: + if column > 0: + message = f"{message}, line {line}, column {column}" + else: + message = f"{message}, line {line}" + return exctype(message, code, line, column, filename) + + @cython.final + cdef _buildExceptionMessage(self, default_message): + if self._first_error is None: + return default_message + if self._first_error.message: + message = self._first_error.message + elif default_message is None: + return None + else: + message = default_message + if self._first_error.line > 0: + if self._first_error.column > 0: + message = f"{message}, line {self._first_error.line}, column {self._first_error.column}" + else: + message = f"{message}, line {self._first_error.line}" + return message + +cdef class _ListErrorLog(_BaseErrorLog): + "Immutable base version of a list based error log." + cdef list _entries + cdef int _offset + def __init__(self, entries, first_error, last_error): + if entries: + if first_error is None: + first_error = entries[0] + if last_error is None: + last_error = entries[-1] + _BaseErrorLog.__init__(self, first_error, last_error) + self._entries = entries + + cpdef copy(self): + """Creates a shallow copy of this error log. Reuses the list of + entries. + """ + cdef _ListErrorLog log = _ListErrorLog( + self._entries, self._first_error, self.last_error) + log._offset = self._offset + return log + + def __iter__(self): + entries = self._entries + if self._offset: + entries = islice(entries, self._offset) + return iter(entries) + + def __repr__(self): + return '\n'.join([repr(entry) for entry in self]) + + def __getitem__(self, index): + if self._offset: + index += self._offset + return self._entries[index] + + def __len__(self): + return len(self._entries) - self._offset + + def __contains__(self, error_type): + cdef Py_ssize_t i + for i, entry in enumerate(self._entries): + if i < self._offset: + continue + if entry.type == error_type: + return True + return False + + def __bool__(self): + return len(self._entries) > self._offset + + def filter_domains(self, domains): + """Filter the errors by the given domains and return a new error log + containing the matches. + """ + cdef _LogEntry entry + if isinstance(domains, int): + domains = (domains,) + filtered = [entry for entry in self if entry.domain in domains] + return _ListErrorLog(filtered, None, None) + + def filter_types(self, types): + """filter_types(self, types) + + Filter the errors by the given types and return a new error + log containing the matches. + """ + cdef _LogEntry entry + if isinstance(types, int): + types = (types,) + filtered = [entry for entry in self if entry.type in types] + return _ListErrorLog(filtered, None, None) + + def filter_levels(self, levels): + """filter_levels(self, levels) + + Filter the errors by the given error levels and return a new + error log containing the matches. + """ + cdef _LogEntry entry + if isinstance(levels, int): + levels = (levels,) + filtered = [entry for entry in self if entry.level in levels] + return _ListErrorLog(filtered, None, None) + + def filter_from_level(self, level): + """filter_from_level(self, level) + + Return a log with all messages of the requested level of worse. + """ + cdef _LogEntry entry + filtered = [entry for entry in self if entry.level >= level] + return _ListErrorLog(filtered, None, None) + + def filter_from_fatals(self): + """filter_from_fatals(self) + + Convenience method to get all fatal error messages. + """ + return self.filter_from_level(ErrorLevels.FATAL) + + def filter_from_errors(self): + """filter_from_errors(self) + + Convenience method to get all error messages or worse. + """ + return self.filter_from_level(ErrorLevels.ERROR) + + def filter_from_warnings(self): + """filter_from_warnings(self) + + Convenience method to get all warnings or worse. + """ + return self.filter_from_level(ErrorLevels.WARNING) + + +@cython.final +@cython.internal +cdef class _ErrorLogContext: + """ + Error log context for the 'with' statement. + Stores a reference to the current callbacks to allow for + recursively stacked log contexts. + """ + cdef xmlerror.xmlStructuredErrorFunc old_error_func + cdef void* old_error_context + cdef xmlerror.xmlGenericErrorFunc old_xslt_error_func + cdef void* old_xslt_error_context + cdef _BaseErrorLog old_xslt_error_log + + cdef int push_error_log(self, _BaseErrorLog log) except -1: + self.old_error_func = xmlerror.xmlStructuredError + self.old_error_context = xmlerror.xmlStructuredErrorContext + xmlerror.xmlSetStructuredErrorFunc( + <void*>log, <xmlerror.xmlStructuredErrorFunc>_receiveError) + + # xslt.xsltSetGenericErrorFunc() is not thread-local => keep error log in TLS + self.old_xslt_error_func = xslt.xsltGenericError + self.old_xslt_error_context = xslt.xsltGenericErrorContext + self.old_xslt_error_log = _getThreadErrorLog(XSLT_ERROR_LOG) + _setThreadErrorLog(XSLT_ERROR_LOG, log) + xslt.xsltSetGenericErrorFunc( + NULL, <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) + return 0 + + cdef int pop_error_log(self) except -1: + xmlerror.xmlSetStructuredErrorFunc( + self.old_error_context, self.old_error_func) + xslt.xsltSetGenericErrorFunc( + self.old_xslt_error_context, self.old_xslt_error_func) + _setThreadErrorLog(XSLT_ERROR_LOG, self.old_xslt_error_log) + self.old_xslt_error_log= None + return 0 + + +cdef class _ErrorLog(_ListErrorLog): + cdef list _logContexts + def __cinit__(self): + self._logContexts = [] + + def __init__(self): + _ListErrorLog.__init__(self, [], None, None) + + @cython.final + cdef int __enter__(self) except -1: + return self.connect() + + def __exit__(self, *args): + # TODO: make this a cdef function when Cython supports it + self.disconnect() + + @cython.final + cdef int connect(self) except -1: + self._first_error = None + del self._entries[:] + + cdef _ErrorLogContext context = _ErrorLogContext.__new__(_ErrorLogContext) + context.push_error_log(self) + self._logContexts.append(context) + return 0 + + @cython.final + cdef int disconnect(self) except -1: + cdef _ErrorLogContext context = self._logContexts.pop() + context.pop_error_log() + return 0 + + cpdef clear(self): + self._first_error = None + self.last_error = None + self._offset = 0 + del self._entries[:] + + cpdef copy(self): + """Creates a shallow copy of this error log and the list of entries. + """ + return _ListErrorLog( + self._entries[self._offset:], + self._first_error, self.last_error) + + def __iter__(self): + return iter(self._entries[self._offset:]) + + cpdef receive(self, _LogEntry entry): + if self._first_error is None and entry.level >= xmlerror.XML_ERR_ERROR: + self._first_error = entry + self._entries.append(entry) + +cdef class _DomainErrorLog(_ErrorLog): + def __init__(self, domains): + _ErrorLog.__init__(self) + self._accepted_domains = tuple(domains) + + cpdef receive(self, _LogEntry entry): + if entry.domain in self._accepted_domains: + _ErrorLog.receive(self, entry) + +cdef class _RotatingErrorLog(_ErrorLog): + cdef int _max_len + def __init__(self, max_len): + _ErrorLog.__init__(self) + self._max_len = max_len + + cpdef receive(self, _LogEntry entry): + if self._first_error is None and entry.level >= xmlerror.XML_ERR_ERROR: + self._first_error = entry + self._entries.append(entry) + + if len(self._entries) > self._max_len: + self._offset += 1 + if self._offset > self._max_len // 3: + offset = self._offset + self._offset = 0 + del self._entries[:offset] + +cdef class PyErrorLog(_BaseErrorLog): + """PyErrorLog(self, logger_name=None, logger=None) + A global error log that connects to the Python stdlib logging package. + + The constructor accepts an optional logger name or a readily + instantiated logger instance. + + If you want to change the mapping between libxml2's ErrorLevels and Python + logging levels, you can modify the level_map dictionary from a subclass. + + The default mapping is:: + + ErrorLevels.WARNING = logging.WARNING + ErrorLevels.ERROR = logging.ERROR + ErrorLevels.FATAL = logging.CRITICAL + + You can also override the method ``receive()`` that takes a LogEntry + object and calls ``self.log(log_entry, format_string, arg1, arg2, ...)`` + with appropriate data. + """ + cdef readonly dict level_map + cdef object _map_level + cdef object _log + def __init__(self, logger_name=None, logger=None): + _BaseErrorLog.__init__(self, None, None) + import logging + self.level_map = { + ErrorLevels.WARNING : logging.WARNING, + ErrorLevels.ERROR : logging.ERROR, + ErrorLevels.FATAL : logging.CRITICAL + } + self._map_level = self.level_map.get + if logger is None: + if logger_name: + logger = logging.getLogger(logger_name) + else: + logger = logging.getLogger() + self._log = logger.log + + cpdef copy(self): + """Dummy method that returns an empty error log. + """ + return _ListErrorLog([], None, None) + + def log(self, log_entry, message, *args): + """log(self, log_entry, message, *args) + + Called by the .receive() method to log a _LogEntry instance to + the Python logging system. This handles the error level + mapping. + + In the default implementation, the ``message`` argument + receives a complete log line, and there are no further + ``args``. To change the message format, it is best to + override the .receive() method instead of this one. + """ + self._log( + self._map_level(log_entry.level, 0), + message, *args + ) + + cpdef receive(self, _LogEntry log_entry): + """receive(self, log_entry) + + Receive a _LogEntry instance from the logging system. Calls + the .log() method with appropriate parameters:: + + self.log(log_entry, repr(log_entry)) + + You can override this method to provide your own log output + format. + """ + self.log(log_entry, repr(log_entry)) + +# thread-local, global list log to collect error output messages from +# libxml2/libxslt + +cdef _BaseErrorLog __GLOBAL_ERROR_LOG = _RotatingErrorLog(__MAX_LOG_SIZE) + + +cdef _BaseErrorLog _getThreadErrorLog(name): + """Retrieve the current error log with name 'name' of this thread.""" + cdef python.PyObject* thread_dict + thread_dict = python.PyThreadState_GetDict() + if thread_dict is NULL: + return __GLOBAL_ERROR_LOG + try: + return (<object>thread_dict)[name] + except KeyError: + log = (<object>thread_dict)[name] = \ + _RotatingErrorLog(__MAX_LOG_SIZE) + return log + + +cdef _setThreadErrorLog(name, _BaseErrorLog log): + """Set the global error log of this thread.""" + cdef python.PyObject* thread_dict + thread_dict = python.PyThreadState_GetDict() + if thread_dict is NULL: + if name == GLOBAL_ERROR_LOG: + global __GLOBAL_ERROR_LOG + __GLOBAL_ERROR_LOG = log + else: + (<object>thread_dict)[name] = log + + +cdef __copyGlobalErrorLog(): + "Helper function for properties in exceptions." + return _getThreadErrorLog(GLOBAL_ERROR_LOG).copy() + + +def use_global_python_log(PyErrorLog log not None): + """use_global_python_log(log) + + Replace the global error log by an etree.PyErrorLog that uses the + standard Python logging package. + + Note that this disables access to the global error log from exceptions. + Parsers, XSLT etc. will continue to provide their normal local error log. + + Note: prior to lxml 2.2, this changed the error log globally. + Since lxml 2.2, the global error log is local to a thread and this + function will only set the global error log of the current thread. + """ + _setThreadErrorLog(GLOBAL_ERROR_LOG, log) + + +# local log functions: forward error to logger object +cdef void _forwardError(void* c_log_handler, const xmlerror.xmlError* error) noexcept with gil: + cdef _BaseErrorLog log_handler + if c_log_handler is not NULL: + log_handler = <_BaseErrorLog>c_log_handler + elif error.domain == xmlerror.XML_FROM_XSLT: + log_handler = _getThreadErrorLog(XSLT_ERROR_LOG) + else: + log_handler = _getThreadErrorLog(GLOBAL_ERROR_LOG) + log_handler._receive(error) + + +cdef void _receiveError(void* c_log_handler, const xmlerror.xmlError* error) noexcept nogil: + # no Python objects here, may be called without thread context ! + if __DEBUG: + _forwardError(c_log_handler, error) + + +cdef void _receiveXSLTError(void* c_log_handler, char* msg, ...) noexcept nogil: + # no Python objects here, may be called without thread context ! + cdef cvarargs.va_list args + cvarargs.va_start(args, msg) + _receiveGenericError(c_log_handler, xmlerror.XML_FROM_XSLT, msg, args) + cvarargs.va_end(args) + +cdef void _receiveRelaxNGParseError(void* c_log_handler, char* msg, ...) noexcept nogil: + # no Python objects here, may be called without thread context ! + cdef cvarargs.va_list args + cvarargs.va_start(args, msg) + _receiveGenericError(c_log_handler, xmlerror.XML_FROM_RELAXNGP, msg, args) + cvarargs.va_end(args) + +cdef void _receiveRelaxNGValidationError(void* c_log_handler, char* msg, ...) noexcept nogil: + # no Python objects here, may be called without thread context ! + cdef cvarargs.va_list args + cvarargs.va_start(args, msg) + _receiveGenericError(c_log_handler, xmlerror.XML_FROM_RELAXNGV, msg, args) + cvarargs.va_end(args) + +# dummy function: no log output at all +cdef void _nullGenericErrorFunc(void* ctxt, char* msg, ...) noexcept nogil: + pass + + +cdef void _connectGenericErrorLog(log, int c_domain=-1) noexcept: + cdef xmlerror.xmlGenericErrorFunc error_func = NULL + c_log = <void*>log + if c_domain == xmlerror.XML_FROM_XSLT: + error_func = <xmlerror.xmlGenericErrorFunc>_receiveXSLTError + elif c_domain == xmlerror.XML_FROM_RELAXNGP: + error_func = <xmlerror.xmlGenericErrorFunc>_receiveRelaxNGParseError + elif c_domain == xmlerror.XML_FROM_RELAXNGV: + error_func = <xmlerror.xmlGenericErrorFunc>_receiveRelaxNGValidationError + + if log is None or error_func is NULL: + c_log = NULL + error_func = <xmlerror.xmlGenericErrorFunc>_nullGenericErrorFunc + xmlerror.xmlSetGenericErrorFunc(c_log, error_func) + + +cdef void _receiveGenericError(void* c_log_handler, int c_domain, + char* msg, cvarargs.va_list args) noexcept nogil: + # no Python objects here, may be called without thread context ! + cdef xmlerror.xmlError c_error + cdef char* c_text + cdef char* c_message + cdef char* c_element + cdef char* c_pos + cdef char* c_name_pos + cdef char* c_str + cdef int text_size, element_size, format_count, c_int + if not __DEBUG or msg is NULL: + return + if msg[0] in b'\n\0': + return + + c_text = c_element = c_error.file = c_error.node = NULL + c_error.line = 0 + + # parse "NAME %s" chunks from the format string + c_name_pos = c_pos = msg + format_count = 0 + while c_pos[0]: + if c_pos[0] == b'%': + c_pos += 1 + if c_pos[0] == b's': # "%s" + format_count += 1 + c_str = cvarargs.va_charptr(args) + if c_pos == msg + 1: + c_text = c_str # msg == "%s..." + elif c_name_pos[0] == b'e': + if cstring_h.strncmp(c_name_pos, 'element %s', 10) == 0: + c_element = c_str + elif c_name_pos[0] == b'f': + if cstring_h.strncmp(c_name_pos, 'file %s', 7) == 0: + if cstring_h.strncmp('string://__STRING__XSLT', + c_str, 23) == 0: + c_str = '<xslt>' + c_error.file = c_str + elif c_pos[0] == b'd': # "%d" + format_count += 1 + c_int = cvarargs.va_int(args) + if cstring_h.strncmp(c_name_pos, 'line %d', 7) == 0: + c_error.line = c_int + elif c_pos[0] != b'%': # "%%" == "%" + format_count += 1 + break # unexpected format or end of string => abort + elif c_pos[0] == b' ': + if c_pos[1] != b'%': + c_name_pos = c_pos + 1 + c_pos += 1 + + c_message = NULL + if c_text is NULL: + if c_element is not NULL and format_count == 1: + # special case: a single occurrence of 'element %s' + text_size = cstring_h.strlen(msg) + element_size = cstring_h.strlen(c_element) + c_message = <char*>stdlib.malloc( + (text_size + element_size + 1) * sizeof(char)) + stdio.sprintf(c_message, msg, c_element) + c_error.message = c_message + else: + c_error.message = '' + elif c_element is NULL: + c_error.message = c_text + else: + text_size = cstring_h.strlen(c_text) + element_size = cstring_h.strlen(c_element) + c_message = <char*>stdlib.malloc( + (text_size + 12 + element_size + 1) * sizeof(char)) + if c_message is NULL: + c_error.message = c_text + else: + stdio.sprintf(c_message, "%s, element '%s'", c_text, c_element) + c_error.message = c_message + + c_error.domain = c_domain + c_error.code = xmlerror.XML_ERR_OK # what else? + c_error.level = xmlerror.XML_ERR_ERROR # what else? + c_error.int2 = 0 + + _forwardError(c_log_handler, &c_error) + + if c_message is not NULL: + stdlib.free(c_message) + +################################################################################ +## CONSTANTS FROM "xmlerror.h" (or rather libxml-xmlerror.html) +################################################################################ + +cdef __initErrorConstants(): + "Called at setup time to parse the constants and build the classes below." + global __ERROR_LEVELS, __ERROR_DOMAINS, __PARSER_ERROR_TYPES, __RELAXNG_ERROR_TYPES + const_defs = ((ErrorLevels, __ERROR_LEVELS), + (ErrorDomains, __ERROR_DOMAINS), + (ErrorTypes, __PARSER_ERROR_TYPES), + (RelaxNGErrorTypes, __RELAXNG_ERROR_TYPES)) + + for cls, constants in const_defs: + reverse_dict = {} + cls._names = reverse_dict + cls._getName = reverse_dict.get + for line in constants.splitlines(): + if not line: + continue + name, value = line.split('=') + value = int(value) + setattr(cls, name, value) + reverse_dict[value] = name + + # discard the global string references after use + __ERROR_LEVELS = __ERROR_DOMAINS = __PARSER_ERROR_TYPES = __RELAXNG_ERROR_TYPES = None + + +class ErrorLevels(object): + """Libxml2 error levels""" + +class ErrorDomains(object): + """Libxml2 error domains""" + +class ErrorTypes(object): + """Libxml2 error types""" + +class RelaxNGErrorTypes(object): + """Libxml2 RelaxNG error types""" + + +# --- BEGIN: GENERATED CONSTANTS --- + +# This section is generated by the script 'update-error-constants.py'. + +cdef object __ERROR_LEVELS = """\ +NONE=0 +WARNING=1 +ERROR=2 +FATAL=3 +""" + +cdef object __ERROR_DOMAINS = """\ +NONE=0 +PARSER=1 +TREE=2 +NAMESPACE=3 +DTD=4 +HTML=5 +MEMORY=6 +OUTPUT=7 +IO=8 +FTP=9 +HTTP=10 +XINCLUDE=11 +XPATH=12 +XPOINTER=13 +REGEXP=14 +DATATYPE=15 +SCHEMASP=16 +SCHEMASV=17 +RELAXNGP=18 +RELAXNGV=19 +CATALOG=20 +C14N=21 +XSLT=22 +VALID=23 +CHECK=24 +WRITER=25 +MODULE=26 +I18N=27 +SCHEMATRONV=28 +BUFFER=29 +URI=30 +""" + +cdef object __PARSER_ERROR_TYPES = """\ +ERR_OK=0 +ERR_INTERNAL_ERROR=1 +ERR_NO_MEMORY=2 +ERR_DOCUMENT_START=3 +ERR_DOCUMENT_EMPTY=4 +ERR_DOCUMENT_END=5 +ERR_INVALID_HEX_CHARREF=6 +ERR_INVALID_DEC_CHARREF=7 +ERR_INVALID_CHARREF=8 +ERR_INVALID_CHAR=9 +ERR_CHARREF_AT_EOF=10 +ERR_CHARREF_IN_PROLOG=11 +ERR_CHARREF_IN_EPILOG=12 +ERR_CHARREF_IN_DTD=13 +ERR_ENTITYREF_AT_EOF=14 +ERR_ENTITYREF_IN_PROLOG=15 +ERR_ENTITYREF_IN_EPILOG=16 +ERR_ENTITYREF_IN_DTD=17 +ERR_PEREF_AT_EOF=18 +ERR_PEREF_IN_PROLOG=19 +ERR_PEREF_IN_EPILOG=20 +ERR_PEREF_IN_INT_SUBSET=21 +ERR_ENTITYREF_NO_NAME=22 +ERR_ENTITYREF_SEMICOL_MISSING=23 +ERR_PEREF_NO_NAME=24 +ERR_PEREF_SEMICOL_MISSING=25 +ERR_UNDECLARED_ENTITY=26 +WAR_UNDECLARED_ENTITY=27 +ERR_UNPARSED_ENTITY=28 +ERR_ENTITY_IS_EXTERNAL=29 +ERR_ENTITY_IS_PARAMETER=30 +ERR_UNKNOWN_ENCODING=31 +ERR_UNSUPPORTED_ENCODING=32 +ERR_STRING_NOT_STARTED=33 +ERR_STRING_NOT_CLOSED=34 +ERR_NS_DECL_ERROR=35 +ERR_ENTITY_NOT_STARTED=36 +ERR_ENTITY_NOT_FINISHED=37 +ERR_LT_IN_ATTRIBUTE=38 +ERR_ATTRIBUTE_NOT_STARTED=39 +ERR_ATTRIBUTE_NOT_FINISHED=40 +ERR_ATTRIBUTE_WITHOUT_VALUE=41 +ERR_ATTRIBUTE_REDEFINED=42 +ERR_LITERAL_NOT_STARTED=43 +ERR_LITERAL_NOT_FINISHED=44 +ERR_COMMENT_NOT_FINISHED=45 +ERR_PI_NOT_STARTED=46 +ERR_PI_NOT_FINISHED=47 +ERR_NOTATION_NOT_STARTED=48 +ERR_NOTATION_NOT_FINISHED=49 +ERR_ATTLIST_NOT_STARTED=50 +ERR_ATTLIST_NOT_FINISHED=51 +ERR_MIXED_NOT_STARTED=52 +ERR_MIXED_NOT_FINISHED=53 +ERR_ELEMCONTENT_NOT_STARTED=54 +ERR_ELEMCONTENT_NOT_FINISHED=55 +ERR_XMLDECL_NOT_STARTED=56 +ERR_XMLDECL_NOT_FINISHED=57 +ERR_CONDSEC_NOT_STARTED=58 +ERR_CONDSEC_NOT_FINISHED=59 +ERR_EXT_SUBSET_NOT_FINISHED=60 +ERR_DOCTYPE_NOT_FINISHED=61 +ERR_MISPLACED_CDATA_END=62 +ERR_CDATA_NOT_FINISHED=63 +ERR_RESERVED_XML_NAME=64 +ERR_SPACE_REQUIRED=65 +ERR_SEPARATOR_REQUIRED=66 +ERR_NMTOKEN_REQUIRED=67 +ERR_NAME_REQUIRED=68 +ERR_PCDATA_REQUIRED=69 +ERR_URI_REQUIRED=70 +ERR_PUBID_REQUIRED=71 +ERR_LT_REQUIRED=72 +ERR_GT_REQUIRED=73 +ERR_LTSLASH_REQUIRED=74 +ERR_EQUAL_REQUIRED=75 +ERR_TAG_NAME_MISMATCH=76 +ERR_TAG_NOT_FINISHED=77 +ERR_STANDALONE_VALUE=78 +ERR_ENCODING_NAME=79 +ERR_HYPHEN_IN_COMMENT=80 +ERR_INVALID_ENCODING=81 +ERR_EXT_ENTITY_STANDALONE=82 +ERR_CONDSEC_INVALID=83 +ERR_VALUE_REQUIRED=84 +ERR_NOT_WELL_BALANCED=85 +ERR_EXTRA_CONTENT=86 +ERR_ENTITY_CHAR_ERROR=87 +ERR_ENTITY_PE_INTERNAL=88 +ERR_ENTITY_LOOP=89 +ERR_ENTITY_BOUNDARY=90 +ERR_INVALID_URI=91 +ERR_URI_FRAGMENT=92 +WAR_CATALOG_PI=93 +ERR_NO_DTD=94 +ERR_CONDSEC_INVALID_KEYWORD=95 +ERR_VERSION_MISSING=96 +WAR_UNKNOWN_VERSION=97 +WAR_LANG_VALUE=98 +WAR_NS_URI=99 +WAR_NS_URI_RELATIVE=100 +ERR_MISSING_ENCODING=101 +WAR_SPACE_VALUE=102 +ERR_NOT_STANDALONE=103 +ERR_ENTITY_PROCESSING=104 +ERR_NOTATION_PROCESSING=105 +WAR_NS_COLUMN=106 +WAR_ENTITY_REDEFINED=107 +ERR_UNKNOWN_VERSION=108 +ERR_VERSION_MISMATCH=109 +ERR_NAME_TOO_LONG=110 +ERR_USER_STOP=111 +ERR_COMMENT_ABRUPTLY_ENDED=112 +NS_ERR_XML_NAMESPACE=200 +NS_ERR_UNDEFINED_NAMESPACE=201 +NS_ERR_QNAME=202 +NS_ERR_ATTRIBUTE_REDEFINED=203 +NS_ERR_EMPTY=204 +NS_ERR_COLON=205 +DTD_ATTRIBUTE_DEFAULT=500 +DTD_ATTRIBUTE_REDEFINED=501 +DTD_ATTRIBUTE_VALUE=502 +DTD_CONTENT_ERROR=503 +DTD_CONTENT_MODEL=504 +DTD_CONTENT_NOT_DETERMINIST=505 +DTD_DIFFERENT_PREFIX=506 +DTD_ELEM_DEFAULT_NAMESPACE=507 +DTD_ELEM_NAMESPACE=508 +DTD_ELEM_REDEFINED=509 +DTD_EMPTY_NOTATION=510 +DTD_ENTITY_TYPE=511 +DTD_ID_FIXED=512 +DTD_ID_REDEFINED=513 +DTD_ID_SUBSET=514 +DTD_INVALID_CHILD=515 +DTD_INVALID_DEFAULT=516 +DTD_LOAD_ERROR=517 +DTD_MISSING_ATTRIBUTE=518 +DTD_MIXED_CORRUPT=519 +DTD_MULTIPLE_ID=520 +DTD_NO_DOC=521 +DTD_NO_DTD=522 +DTD_NO_ELEM_NAME=523 +DTD_NO_PREFIX=524 +DTD_NO_ROOT=525 +DTD_NOTATION_REDEFINED=526 +DTD_NOTATION_VALUE=527 +DTD_NOT_EMPTY=528 +DTD_NOT_PCDATA=529 +DTD_NOT_STANDALONE=530 +DTD_ROOT_NAME=531 +DTD_STANDALONE_WHITE_SPACE=532 +DTD_UNKNOWN_ATTRIBUTE=533 +DTD_UNKNOWN_ELEM=534 +DTD_UNKNOWN_ENTITY=535 +DTD_UNKNOWN_ID=536 +DTD_UNKNOWN_NOTATION=537 +DTD_STANDALONE_DEFAULTED=538 +DTD_XMLID_VALUE=539 +DTD_XMLID_TYPE=540 +DTD_DUP_TOKEN=541 +HTML_STRUCURE_ERROR=800 +HTML_UNKNOWN_TAG=801 +RNGP_ANYNAME_ATTR_ANCESTOR=1000 +RNGP_ATTR_CONFLICT=1001 +RNGP_ATTRIBUTE_CHILDREN=1002 +RNGP_ATTRIBUTE_CONTENT=1003 +RNGP_ATTRIBUTE_EMPTY=1004 +RNGP_ATTRIBUTE_NOOP=1005 +RNGP_CHOICE_CONTENT=1006 +RNGP_CHOICE_EMPTY=1007 +RNGP_CREATE_FAILURE=1008 +RNGP_DATA_CONTENT=1009 +RNGP_DEF_CHOICE_AND_INTERLEAVE=1010 +RNGP_DEFINE_CREATE_FAILED=1011 +RNGP_DEFINE_EMPTY=1012 +RNGP_DEFINE_MISSING=1013 +RNGP_DEFINE_NAME_MISSING=1014 +RNGP_ELEM_CONTENT_EMPTY=1015 +RNGP_ELEM_CONTENT_ERROR=1016 +RNGP_ELEMENT_EMPTY=1017 +RNGP_ELEMENT_CONTENT=1018 +RNGP_ELEMENT_NAME=1019 +RNGP_ELEMENT_NO_CONTENT=1020 +RNGP_ELEM_TEXT_CONFLICT=1021 +RNGP_EMPTY=1022 +RNGP_EMPTY_CONSTRUCT=1023 +RNGP_EMPTY_CONTENT=1024 +RNGP_EMPTY_NOT_EMPTY=1025 +RNGP_ERROR_TYPE_LIB=1026 +RNGP_EXCEPT_EMPTY=1027 +RNGP_EXCEPT_MISSING=1028 +RNGP_EXCEPT_MULTIPLE=1029 +RNGP_EXCEPT_NO_CONTENT=1030 +RNGP_EXTERNALREF_EMTPY=1031 +RNGP_EXTERNAL_REF_FAILURE=1032 +RNGP_EXTERNALREF_RECURSE=1033 +RNGP_FORBIDDEN_ATTRIBUTE=1034 +RNGP_FOREIGN_ELEMENT=1035 +RNGP_GRAMMAR_CONTENT=1036 +RNGP_GRAMMAR_EMPTY=1037 +RNGP_GRAMMAR_MISSING=1038 +RNGP_GRAMMAR_NO_START=1039 +RNGP_GROUP_ATTR_CONFLICT=1040 +RNGP_HREF_ERROR=1041 +RNGP_INCLUDE_EMPTY=1042 +RNGP_INCLUDE_FAILURE=1043 +RNGP_INCLUDE_RECURSE=1044 +RNGP_INTERLEAVE_ADD=1045 +RNGP_INTERLEAVE_CREATE_FAILED=1046 +RNGP_INTERLEAVE_EMPTY=1047 +RNGP_INTERLEAVE_NO_CONTENT=1048 +RNGP_INVALID_DEFINE_NAME=1049 +RNGP_INVALID_URI=1050 +RNGP_INVALID_VALUE=1051 +RNGP_MISSING_HREF=1052 +RNGP_NAME_MISSING=1053 +RNGP_NEED_COMBINE=1054 +RNGP_NOTALLOWED_NOT_EMPTY=1055 +RNGP_NSNAME_ATTR_ANCESTOR=1056 +RNGP_NSNAME_NO_NS=1057 +RNGP_PARAM_FORBIDDEN=1058 +RNGP_PARAM_NAME_MISSING=1059 +RNGP_PARENTREF_CREATE_FAILED=1060 +RNGP_PARENTREF_NAME_INVALID=1061 +RNGP_PARENTREF_NO_NAME=1062 +RNGP_PARENTREF_NO_PARENT=1063 +RNGP_PARENTREF_NOT_EMPTY=1064 +RNGP_PARSE_ERROR=1065 +RNGP_PAT_ANYNAME_EXCEPT_ANYNAME=1066 +RNGP_PAT_ATTR_ATTR=1067 +RNGP_PAT_ATTR_ELEM=1068 +RNGP_PAT_DATA_EXCEPT_ATTR=1069 +RNGP_PAT_DATA_EXCEPT_ELEM=1070 +RNGP_PAT_DATA_EXCEPT_EMPTY=1071 +RNGP_PAT_DATA_EXCEPT_GROUP=1072 +RNGP_PAT_DATA_EXCEPT_INTERLEAVE=1073 +RNGP_PAT_DATA_EXCEPT_LIST=1074 +RNGP_PAT_DATA_EXCEPT_ONEMORE=1075 +RNGP_PAT_DATA_EXCEPT_REF=1076 +RNGP_PAT_DATA_EXCEPT_TEXT=1077 +RNGP_PAT_LIST_ATTR=1078 +RNGP_PAT_LIST_ELEM=1079 +RNGP_PAT_LIST_INTERLEAVE=1080 +RNGP_PAT_LIST_LIST=1081 +RNGP_PAT_LIST_REF=1082 +RNGP_PAT_LIST_TEXT=1083 +RNGP_PAT_NSNAME_EXCEPT_ANYNAME=1084 +RNGP_PAT_NSNAME_EXCEPT_NSNAME=1085 +RNGP_PAT_ONEMORE_GROUP_ATTR=1086 +RNGP_PAT_ONEMORE_INTERLEAVE_ATTR=1087 +RNGP_PAT_START_ATTR=1088 +RNGP_PAT_START_DATA=1089 +RNGP_PAT_START_EMPTY=1090 +RNGP_PAT_START_GROUP=1091 +RNGP_PAT_START_INTERLEAVE=1092 +RNGP_PAT_START_LIST=1093 +RNGP_PAT_START_ONEMORE=1094 +RNGP_PAT_START_TEXT=1095 +RNGP_PAT_START_VALUE=1096 +RNGP_PREFIX_UNDEFINED=1097 +RNGP_REF_CREATE_FAILED=1098 +RNGP_REF_CYCLE=1099 +RNGP_REF_NAME_INVALID=1100 +RNGP_REF_NO_DEF=1101 +RNGP_REF_NO_NAME=1102 +RNGP_REF_NOT_EMPTY=1103 +RNGP_START_CHOICE_AND_INTERLEAVE=1104 +RNGP_START_CONTENT=1105 +RNGP_START_EMPTY=1106 +RNGP_START_MISSING=1107 +RNGP_TEXT_EXPECTED=1108 +RNGP_TEXT_HAS_CHILD=1109 +RNGP_TYPE_MISSING=1110 +RNGP_TYPE_NOT_FOUND=1111 +RNGP_TYPE_VALUE=1112 +RNGP_UNKNOWN_ATTRIBUTE=1113 +RNGP_UNKNOWN_COMBINE=1114 +RNGP_UNKNOWN_CONSTRUCT=1115 +RNGP_UNKNOWN_TYPE_LIB=1116 +RNGP_URI_FRAGMENT=1117 +RNGP_URI_NOT_ABSOLUTE=1118 +RNGP_VALUE_EMPTY=1119 +RNGP_VALUE_NO_CONTENT=1120 +RNGP_XMLNS_NAME=1121 +RNGP_XML_NS=1122 +XPATH_EXPRESSION_OK=1200 +XPATH_NUMBER_ERROR=1201 +XPATH_UNFINISHED_LITERAL_ERROR=1202 +XPATH_START_LITERAL_ERROR=1203 +XPATH_VARIABLE_REF_ERROR=1204 +XPATH_UNDEF_VARIABLE_ERROR=1205 +XPATH_INVALID_PREDICATE_ERROR=1206 +XPATH_EXPR_ERROR=1207 +XPATH_UNCLOSED_ERROR=1208 +XPATH_UNKNOWN_FUNC_ERROR=1209 +XPATH_INVALID_OPERAND=1210 +XPATH_INVALID_TYPE=1211 +XPATH_INVALID_ARITY=1212 +XPATH_INVALID_CTXT_SIZE=1213 +XPATH_INVALID_CTXT_POSITION=1214 +XPATH_MEMORY_ERROR=1215 +XPTR_SYNTAX_ERROR=1216 +XPTR_RESOURCE_ERROR=1217 +XPTR_SUB_RESOURCE_ERROR=1218 +XPATH_UNDEF_PREFIX_ERROR=1219 +XPATH_ENCODING_ERROR=1220 +XPATH_INVALID_CHAR_ERROR=1221 +TREE_INVALID_HEX=1300 +TREE_INVALID_DEC=1301 +TREE_UNTERMINATED_ENTITY=1302 +TREE_NOT_UTF8=1303 +SAVE_NOT_UTF8=1400 +SAVE_CHAR_INVALID=1401 +SAVE_NO_DOCTYPE=1402 +SAVE_UNKNOWN_ENCODING=1403 +REGEXP_COMPILE_ERROR=1450 +IO_UNKNOWN=1500 +IO_EACCES=1501 +IO_EAGAIN=1502 +IO_EBADF=1503 +IO_EBADMSG=1504 +IO_EBUSY=1505 +IO_ECANCELED=1506 +IO_ECHILD=1507 +IO_EDEADLK=1508 +IO_EDOM=1509 +IO_EEXIST=1510 +IO_EFAULT=1511 +IO_EFBIG=1512 +IO_EINPROGRESS=1513 +IO_EINTR=1514 +IO_EINVAL=1515 +IO_EIO=1516 +IO_EISDIR=1517 +IO_EMFILE=1518 +IO_EMLINK=1519 +IO_EMSGSIZE=1520 +IO_ENAMETOOLONG=1521 +IO_ENFILE=1522 +IO_ENODEV=1523 +IO_ENOENT=1524 +IO_ENOEXEC=1525 +IO_ENOLCK=1526 +IO_ENOMEM=1527 +IO_ENOSPC=1528 +IO_ENOSYS=1529 +IO_ENOTDIR=1530 +IO_ENOTEMPTY=1531 +IO_ENOTSUP=1532 +IO_ENOTTY=1533 +IO_ENXIO=1534 +IO_EPERM=1535 +IO_EPIPE=1536 +IO_ERANGE=1537 +IO_EROFS=1538 +IO_ESPIPE=1539 +IO_ESRCH=1540 +IO_ETIMEDOUT=1541 +IO_EXDEV=1542 +IO_NETWORK_ATTEMPT=1543 +IO_ENCODER=1544 +IO_FLUSH=1545 +IO_WRITE=1546 +IO_NO_INPUT=1547 +IO_BUFFER_FULL=1548 +IO_LOAD_ERROR=1549 +IO_ENOTSOCK=1550 +IO_EISCONN=1551 +IO_ECONNREFUSED=1552 +IO_ENETUNREACH=1553 +IO_EADDRINUSE=1554 +IO_EALREADY=1555 +IO_EAFNOSUPPORT=1556 +XINCLUDE_RECURSION=1600 +XINCLUDE_PARSE_VALUE=1601 +XINCLUDE_ENTITY_DEF_MISMATCH=1602 +XINCLUDE_NO_HREF=1603 +XINCLUDE_NO_FALLBACK=1604 +XINCLUDE_HREF_URI=1605 +XINCLUDE_TEXT_FRAGMENT=1606 +XINCLUDE_TEXT_DOCUMENT=1607 +XINCLUDE_INVALID_CHAR=1608 +XINCLUDE_BUILD_FAILED=1609 +XINCLUDE_UNKNOWN_ENCODING=1610 +XINCLUDE_MULTIPLE_ROOT=1611 +XINCLUDE_XPTR_FAILED=1612 +XINCLUDE_XPTR_RESULT=1613 +XINCLUDE_INCLUDE_IN_INCLUDE=1614 +XINCLUDE_FALLBACKS_IN_INCLUDE=1615 +XINCLUDE_FALLBACK_NOT_IN_INCLUDE=1616 +XINCLUDE_DEPRECATED_NS=1617 +XINCLUDE_FRAGMENT_ID=1618 +CATALOG_MISSING_ATTR=1650 +CATALOG_ENTRY_BROKEN=1651 +CATALOG_PREFER_VALUE=1652 +CATALOG_NOT_CATALOG=1653 +CATALOG_RECURSION=1654 +SCHEMAP_PREFIX_UNDEFINED=1700 +SCHEMAP_ATTRFORMDEFAULT_VALUE=1701 +SCHEMAP_ATTRGRP_NONAME_NOREF=1702 +SCHEMAP_ATTR_NONAME_NOREF=1703 +SCHEMAP_COMPLEXTYPE_NONAME_NOREF=1704 +SCHEMAP_ELEMFORMDEFAULT_VALUE=1705 +SCHEMAP_ELEM_NONAME_NOREF=1706 +SCHEMAP_EXTENSION_NO_BASE=1707 +SCHEMAP_FACET_NO_VALUE=1708 +SCHEMAP_FAILED_BUILD_IMPORT=1709 +SCHEMAP_GROUP_NONAME_NOREF=1710 +SCHEMAP_IMPORT_NAMESPACE_NOT_URI=1711 +SCHEMAP_IMPORT_REDEFINE_NSNAME=1712 +SCHEMAP_IMPORT_SCHEMA_NOT_URI=1713 +SCHEMAP_INVALID_BOOLEAN=1714 +SCHEMAP_INVALID_ENUM=1715 +SCHEMAP_INVALID_FACET=1716 +SCHEMAP_INVALID_FACET_VALUE=1717 +SCHEMAP_INVALID_MAXOCCURS=1718 +SCHEMAP_INVALID_MINOCCURS=1719 +SCHEMAP_INVALID_REF_AND_SUBTYPE=1720 +SCHEMAP_INVALID_WHITE_SPACE=1721 +SCHEMAP_NOATTR_NOREF=1722 +SCHEMAP_NOTATION_NO_NAME=1723 +SCHEMAP_NOTYPE_NOREF=1724 +SCHEMAP_REF_AND_SUBTYPE=1725 +SCHEMAP_RESTRICTION_NONAME_NOREF=1726 +SCHEMAP_SIMPLETYPE_NONAME=1727 +SCHEMAP_TYPE_AND_SUBTYPE=1728 +SCHEMAP_UNKNOWN_ALL_CHILD=1729 +SCHEMAP_UNKNOWN_ANYATTRIBUTE_CHILD=1730 +SCHEMAP_UNKNOWN_ATTR_CHILD=1731 +SCHEMAP_UNKNOWN_ATTRGRP_CHILD=1732 +SCHEMAP_UNKNOWN_ATTRIBUTE_GROUP=1733 +SCHEMAP_UNKNOWN_BASE_TYPE=1734 +SCHEMAP_UNKNOWN_CHOICE_CHILD=1735 +SCHEMAP_UNKNOWN_COMPLEXCONTENT_CHILD=1736 +SCHEMAP_UNKNOWN_COMPLEXTYPE_CHILD=1737 +SCHEMAP_UNKNOWN_ELEM_CHILD=1738 +SCHEMAP_UNKNOWN_EXTENSION_CHILD=1739 +SCHEMAP_UNKNOWN_FACET_CHILD=1740 +SCHEMAP_UNKNOWN_FACET_TYPE=1741 +SCHEMAP_UNKNOWN_GROUP_CHILD=1742 +SCHEMAP_UNKNOWN_IMPORT_CHILD=1743 +SCHEMAP_UNKNOWN_LIST_CHILD=1744 +SCHEMAP_UNKNOWN_NOTATION_CHILD=1745 +SCHEMAP_UNKNOWN_PROCESSCONTENT_CHILD=1746 +SCHEMAP_UNKNOWN_REF=1747 +SCHEMAP_UNKNOWN_RESTRICTION_CHILD=1748 +SCHEMAP_UNKNOWN_SCHEMAS_CHILD=1749 +SCHEMAP_UNKNOWN_SEQUENCE_CHILD=1750 +SCHEMAP_UNKNOWN_SIMPLECONTENT_CHILD=1751 +SCHEMAP_UNKNOWN_SIMPLETYPE_CHILD=1752 +SCHEMAP_UNKNOWN_TYPE=1753 +SCHEMAP_UNKNOWN_UNION_CHILD=1754 +SCHEMAP_ELEM_DEFAULT_FIXED=1755 +SCHEMAP_REGEXP_INVALID=1756 +SCHEMAP_FAILED_LOAD=1757 +SCHEMAP_NOTHING_TO_PARSE=1758 +SCHEMAP_NOROOT=1759 +SCHEMAP_REDEFINED_GROUP=1760 +SCHEMAP_REDEFINED_TYPE=1761 +SCHEMAP_REDEFINED_ELEMENT=1762 +SCHEMAP_REDEFINED_ATTRGROUP=1763 +SCHEMAP_REDEFINED_ATTR=1764 +SCHEMAP_REDEFINED_NOTATION=1765 +SCHEMAP_FAILED_PARSE=1766 +SCHEMAP_UNKNOWN_PREFIX=1767 +SCHEMAP_DEF_AND_PREFIX=1768 +SCHEMAP_UNKNOWN_INCLUDE_CHILD=1769 +SCHEMAP_INCLUDE_SCHEMA_NOT_URI=1770 +SCHEMAP_INCLUDE_SCHEMA_NO_URI=1771 +SCHEMAP_NOT_SCHEMA=1772 +SCHEMAP_UNKNOWN_MEMBER_TYPE=1773 +SCHEMAP_INVALID_ATTR_USE=1774 +SCHEMAP_RECURSIVE=1775 +SCHEMAP_SUPERNUMEROUS_LIST_ITEM_TYPE=1776 +SCHEMAP_INVALID_ATTR_COMBINATION=1777 +SCHEMAP_INVALID_ATTR_INLINE_COMBINATION=1778 +SCHEMAP_MISSING_SIMPLETYPE_CHILD=1779 +SCHEMAP_INVALID_ATTR_NAME=1780 +SCHEMAP_REF_AND_CONTENT=1781 +SCHEMAP_CT_PROPS_CORRECT_1=1782 +SCHEMAP_CT_PROPS_CORRECT_2=1783 +SCHEMAP_CT_PROPS_CORRECT_3=1784 +SCHEMAP_CT_PROPS_CORRECT_4=1785 +SCHEMAP_CT_PROPS_CORRECT_5=1786 +SCHEMAP_DERIVATION_OK_RESTRICTION_1=1787 +SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_1=1788 +SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_2=1789 +SCHEMAP_DERIVATION_OK_RESTRICTION_2_2=1790 +SCHEMAP_DERIVATION_OK_RESTRICTION_3=1791 +SCHEMAP_WILDCARD_INVALID_NS_MEMBER=1792 +SCHEMAP_INTERSECTION_NOT_EXPRESSIBLE=1793 +SCHEMAP_UNION_NOT_EXPRESSIBLE=1794 +SCHEMAP_SRC_IMPORT_3_1=1795 +SCHEMAP_SRC_IMPORT_3_2=1796 +SCHEMAP_DERIVATION_OK_RESTRICTION_4_1=1797 +SCHEMAP_DERIVATION_OK_RESTRICTION_4_2=1798 +SCHEMAP_DERIVATION_OK_RESTRICTION_4_3=1799 +SCHEMAP_COS_CT_EXTENDS_1_3=1800 +SCHEMAV_NOROOT=1801 +SCHEMAV_UNDECLAREDELEM=1802 +SCHEMAV_NOTTOPLEVEL=1803 +SCHEMAV_MISSING=1804 +SCHEMAV_WRONGELEM=1805 +SCHEMAV_NOTYPE=1806 +SCHEMAV_NOROLLBACK=1807 +SCHEMAV_ISABSTRACT=1808 +SCHEMAV_NOTEMPTY=1809 +SCHEMAV_ELEMCONT=1810 +SCHEMAV_HAVEDEFAULT=1811 +SCHEMAV_NOTNILLABLE=1812 +SCHEMAV_EXTRACONTENT=1813 +SCHEMAV_INVALIDATTR=1814 +SCHEMAV_INVALIDELEM=1815 +SCHEMAV_NOTDETERMINIST=1816 +SCHEMAV_CONSTRUCT=1817 +SCHEMAV_INTERNAL=1818 +SCHEMAV_NOTSIMPLE=1819 +SCHEMAV_ATTRUNKNOWN=1820 +SCHEMAV_ATTRINVALID=1821 +SCHEMAV_VALUE=1822 +SCHEMAV_FACET=1823 +SCHEMAV_CVC_DATATYPE_VALID_1_2_1=1824 +SCHEMAV_CVC_DATATYPE_VALID_1_2_2=1825 +SCHEMAV_CVC_DATATYPE_VALID_1_2_3=1826 +SCHEMAV_CVC_TYPE_3_1_1=1827 +SCHEMAV_CVC_TYPE_3_1_2=1828 +SCHEMAV_CVC_FACET_VALID=1829 +SCHEMAV_CVC_LENGTH_VALID=1830 +SCHEMAV_CVC_MINLENGTH_VALID=1831 +SCHEMAV_CVC_MAXLENGTH_VALID=1832 +SCHEMAV_CVC_MININCLUSIVE_VALID=1833 +SCHEMAV_CVC_MAXINCLUSIVE_VALID=1834 +SCHEMAV_CVC_MINEXCLUSIVE_VALID=1835 +SCHEMAV_CVC_MAXEXCLUSIVE_VALID=1836 +SCHEMAV_CVC_TOTALDIGITS_VALID=1837 +SCHEMAV_CVC_FRACTIONDIGITS_VALID=1838 +SCHEMAV_CVC_PATTERN_VALID=1839 +SCHEMAV_CVC_ENUMERATION_VALID=1840 +SCHEMAV_CVC_COMPLEX_TYPE_2_1=1841 +SCHEMAV_CVC_COMPLEX_TYPE_2_2=1842 +SCHEMAV_CVC_COMPLEX_TYPE_2_3=1843 +SCHEMAV_CVC_COMPLEX_TYPE_2_4=1844 +SCHEMAV_CVC_ELT_1=1845 +SCHEMAV_CVC_ELT_2=1846 +SCHEMAV_CVC_ELT_3_1=1847 +SCHEMAV_CVC_ELT_3_2_1=1848 +SCHEMAV_CVC_ELT_3_2_2=1849 +SCHEMAV_CVC_ELT_4_1=1850 +SCHEMAV_CVC_ELT_4_2=1851 +SCHEMAV_CVC_ELT_4_3=1852 +SCHEMAV_CVC_ELT_5_1_1=1853 +SCHEMAV_CVC_ELT_5_1_2=1854 +SCHEMAV_CVC_ELT_5_2_1=1855 +SCHEMAV_CVC_ELT_5_2_2_1=1856 +SCHEMAV_CVC_ELT_5_2_2_2_1=1857 +SCHEMAV_CVC_ELT_5_2_2_2_2=1858 +SCHEMAV_CVC_ELT_6=1859 +SCHEMAV_CVC_ELT_7=1860 +SCHEMAV_CVC_ATTRIBUTE_1=1861 +SCHEMAV_CVC_ATTRIBUTE_2=1862 +SCHEMAV_CVC_ATTRIBUTE_3=1863 +SCHEMAV_CVC_ATTRIBUTE_4=1864 +SCHEMAV_CVC_COMPLEX_TYPE_3_1=1865 +SCHEMAV_CVC_COMPLEX_TYPE_3_2_1=1866 +SCHEMAV_CVC_COMPLEX_TYPE_3_2_2=1867 +SCHEMAV_CVC_COMPLEX_TYPE_4=1868 +SCHEMAV_CVC_COMPLEX_TYPE_5_1=1869 +SCHEMAV_CVC_COMPLEX_TYPE_5_2=1870 +SCHEMAV_ELEMENT_CONTENT=1871 +SCHEMAV_DOCUMENT_ELEMENT_MISSING=1872 +SCHEMAV_CVC_COMPLEX_TYPE_1=1873 +SCHEMAV_CVC_AU=1874 +SCHEMAV_CVC_TYPE_1=1875 +SCHEMAV_CVC_TYPE_2=1876 +SCHEMAV_CVC_IDC=1877 +SCHEMAV_CVC_WILDCARD=1878 +SCHEMAV_MISC=1879 +XPTR_UNKNOWN_SCHEME=1900 +XPTR_CHILDSEQ_START=1901 +XPTR_EVAL_FAILED=1902 +XPTR_EXTRA_OBJECTS=1903 +C14N_CREATE_CTXT=1950 +C14N_REQUIRES_UTF8=1951 +C14N_CREATE_STACK=1952 +C14N_INVALID_NODE=1953 +C14N_UNKNOW_NODE=1954 +C14N_RELATIVE_NAMESPACE=1955 +FTP_PASV_ANSWER=2000 +FTP_EPSV_ANSWER=2001 +FTP_ACCNT=2002 +FTP_URL_SYNTAX=2003 +HTTP_URL_SYNTAX=2020 +HTTP_USE_IP=2021 +HTTP_UNKNOWN_HOST=2022 +SCHEMAP_SRC_SIMPLE_TYPE_1=3000 +SCHEMAP_SRC_SIMPLE_TYPE_2=3001 +SCHEMAP_SRC_SIMPLE_TYPE_3=3002 +SCHEMAP_SRC_SIMPLE_TYPE_4=3003 +SCHEMAP_SRC_RESOLVE=3004 +SCHEMAP_SRC_RESTRICTION_BASE_OR_SIMPLETYPE=3005 +SCHEMAP_SRC_LIST_ITEMTYPE_OR_SIMPLETYPE=3006 +SCHEMAP_SRC_UNION_MEMBERTYPES_OR_SIMPLETYPES=3007 +SCHEMAP_ST_PROPS_CORRECT_1=3008 +SCHEMAP_ST_PROPS_CORRECT_2=3009 +SCHEMAP_ST_PROPS_CORRECT_3=3010 +SCHEMAP_COS_ST_RESTRICTS_1_1=3011 +SCHEMAP_COS_ST_RESTRICTS_1_2=3012 +SCHEMAP_COS_ST_RESTRICTS_1_3_1=3013 +SCHEMAP_COS_ST_RESTRICTS_1_3_2=3014 +SCHEMAP_COS_ST_RESTRICTS_2_1=3015 +SCHEMAP_COS_ST_RESTRICTS_2_3_1_1=3016 +SCHEMAP_COS_ST_RESTRICTS_2_3_1_2=3017 +SCHEMAP_COS_ST_RESTRICTS_2_3_2_1=3018 +SCHEMAP_COS_ST_RESTRICTS_2_3_2_2=3019 +SCHEMAP_COS_ST_RESTRICTS_2_3_2_3=3020 +SCHEMAP_COS_ST_RESTRICTS_2_3_2_4=3021 +SCHEMAP_COS_ST_RESTRICTS_2_3_2_5=3022 +SCHEMAP_COS_ST_RESTRICTS_3_1=3023 +SCHEMAP_COS_ST_RESTRICTS_3_3_1=3024 +SCHEMAP_COS_ST_RESTRICTS_3_3_1_2=3025 +SCHEMAP_COS_ST_RESTRICTS_3_3_2_2=3026 +SCHEMAP_COS_ST_RESTRICTS_3_3_2_1=3027 +SCHEMAP_COS_ST_RESTRICTS_3_3_2_3=3028 +SCHEMAP_COS_ST_RESTRICTS_3_3_2_4=3029 +SCHEMAP_COS_ST_RESTRICTS_3_3_2_5=3030 +SCHEMAP_COS_ST_DERIVED_OK_2_1=3031 +SCHEMAP_COS_ST_DERIVED_OK_2_2=3032 +SCHEMAP_S4S_ELEM_NOT_ALLOWED=3033 +SCHEMAP_S4S_ELEM_MISSING=3034 +SCHEMAP_S4S_ATTR_NOT_ALLOWED=3035 +SCHEMAP_S4S_ATTR_MISSING=3036 +SCHEMAP_S4S_ATTR_INVALID_VALUE=3037 +SCHEMAP_SRC_ELEMENT_1=3038 +SCHEMAP_SRC_ELEMENT_2_1=3039 +SCHEMAP_SRC_ELEMENT_2_2=3040 +SCHEMAP_SRC_ELEMENT_3=3041 +SCHEMAP_P_PROPS_CORRECT_1=3042 +SCHEMAP_P_PROPS_CORRECT_2_1=3043 +SCHEMAP_P_PROPS_CORRECT_2_2=3044 +SCHEMAP_E_PROPS_CORRECT_2=3045 +SCHEMAP_E_PROPS_CORRECT_3=3046 +SCHEMAP_E_PROPS_CORRECT_4=3047 +SCHEMAP_E_PROPS_CORRECT_5=3048 +SCHEMAP_E_PROPS_CORRECT_6=3049 +SCHEMAP_SRC_INCLUDE=3050 +SCHEMAP_SRC_ATTRIBUTE_1=3051 +SCHEMAP_SRC_ATTRIBUTE_2=3052 +SCHEMAP_SRC_ATTRIBUTE_3_1=3053 +SCHEMAP_SRC_ATTRIBUTE_3_2=3054 +SCHEMAP_SRC_ATTRIBUTE_4=3055 +SCHEMAP_NO_XMLNS=3056 +SCHEMAP_NO_XSI=3057 +SCHEMAP_COS_VALID_DEFAULT_1=3058 +SCHEMAP_COS_VALID_DEFAULT_2_1=3059 +SCHEMAP_COS_VALID_DEFAULT_2_2_1=3060 +SCHEMAP_COS_VALID_DEFAULT_2_2_2=3061 +SCHEMAP_CVC_SIMPLE_TYPE=3062 +SCHEMAP_COS_CT_EXTENDS_1_1=3063 +SCHEMAP_SRC_IMPORT_1_1=3064 +SCHEMAP_SRC_IMPORT_1_2=3065 +SCHEMAP_SRC_IMPORT_2=3066 +SCHEMAP_SRC_IMPORT_2_1=3067 +SCHEMAP_SRC_IMPORT_2_2=3068 +SCHEMAP_INTERNAL=3069 +SCHEMAP_NOT_DETERMINISTIC=3070 +SCHEMAP_SRC_ATTRIBUTE_GROUP_1=3071 +SCHEMAP_SRC_ATTRIBUTE_GROUP_2=3072 +SCHEMAP_SRC_ATTRIBUTE_GROUP_3=3073 +SCHEMAP_MG_PROPS_CORRECT_1=3074 +SCHEMAP_MG_PROPS_CORRECT_2=3075 +SCHEMAP_SRC_CT_1=3076 +SCHEMAP_DERIVATION_OK_RESTRICTION_2_1_3=3077 +SCHEMAP_AU_PROPS_CORRECT_2=3078 +SCHEMAP_A_PROPS_CORRECT_2=3079 +SCHEMAP_C_PROPS_CORRECT=3080 +SCHEMAP_SRC_REDEFINE=3081 +SCHEMAP_SRC_IMPORT=3082 +SCHEMAP_WARN_SKIP_SCHEMA=3083 +SCHEMAP_WARN_UNLOCATED_SCHEMA=3084 +SCHEMAP_WARN_ATTR_REDECL_PROH=3085 +SCHEMAP_WARN_ATTR_POINTLESS_PROH=3086 +SCHEMAP_AG_PROPS_CORRECT=3087 +SCHEMAP_COS_CT_EXTENDS_1_2=3088 +SCHEMAP_AU_PROPS_CORRECT=3089 +SCHEMAP_A_PROPS_CORRECT_3=3090 +SCHEMAP_COS_ALL_LIMITED=3091 +SCHEMATRONV_ASSERT=4000 +SCHEMATRONV_REPORT=4001 +MODULE_OPEN=4900 +MODULE_CLOSE=4901 +CHECK_FOUND_ELEMENT=5000 +CHECK_FOUND_ATTRIBUTE=5001 +CHECK_FOUND_TEXT=5002 +CHECK_FOUND_CDATA=5003 +CHECK_FOUND_ENTITYREF=5004 +CHECK_FOUND_ENTITY=5005 +CHECK_FOUND_PI=5006 +CHECK_FOUND_COMMENT=5007 +CHECK_FOUND_DOCTYPE=5008 +CHECK_FOUND_FRAGMENT=5009 +CHECK_FOUND_NOTATION=5010 +CHECK_UNKNOWN_NODE=5011 +CHECK_ENTITY_TYPE=5012 +CHECK_NO_PARENT=5013 +CHECK_NO_DOC=5014 +CHECK_NO_NAME=5015 +CHECK_NO_ELEM=5016 +CHECK_WRONG_DOC=5017 +CHECK_NO_PREV=5018 +CHECK_WRONG_PREV=5019 +CHECK_NO_NEXT=5020 +CHECK_WRONG_NEXT=5021 +CHECK_NOT_DTD=5022 +CHECK_NOT_ATTR=5023 +CHECK_NOT_ATTR_DECL=5024 +CHECK_NOT_ELEM_DECL=5025 +CHECK_NOT_ENTITY_DECL=5026 +CHECK_NOT_NS_DECL=5027 +CHECK_NO_HREF=5028 +CHECK_WRONG_PARENT=5029 +CHECK_NS_SCOPE=5030 +CHECK_NS_ANCESTOR=5031 +CHECK_NOT_UTF8=5032 +CHECK_NO_DICT=5033 +CHECK_NOT_NCNAME=5034 +CHECK_OUTSIDE_DICT=5035 +CHECK_WRONG_NAME=5036 +CHECK_NAME_NOT_NULL=5037 +I18N_NO_NAME=6000 +I18N_NO_HANDLER=6001 +I18N_EXCESS_HANDLER=6002 +I18N_CONV_FAILED=6003 +I18N_NO_OUTPUT=6004 +BUF_OVERFLOW=7000 +""" + +cdef object __RELAXNG_ERROR_TYPES = """\ +RELAXNG_OK=0 +RELAXNG_ERR_MEMORY=1 +RELAXNG_ERR_TYPE=2 +RELAXNG_ERR_TYPEVAL=3 +RELAXNG_ERR_DUPID=4 +RELAXNG_ERR_TYPECMP=5 +RELAXNG_ERR_NOSTATE=6 +RELAXNG_ERR_NODEFINE=7 +RELAXNG_ERR_LISTEXTRA=8 +RELAXNG_ERR_LISTEMPTY=9 +RELAXNG_ERR_INTERNODATA=10 +RELAXNG_ERR_INTERSEQ=11 +RELAXNG_ERR_INTEREXTRA=12 +RELAXNG_ERR_ELEMNAME=13 +RELAXNG_ERR_ATTRNAME=14 +RELAXNG_ERR_ELEMNONS=15 +RELAXNG_ERR_ATTRNONS=16 +RELAXNG_ERR_ELEMWRONGNS=17 +RELAXNG_ERR_ATTRWRONGNS=18 +RELAXNG_ERR_ELEMEXTRANS=19 +RELAXNG_ERR_ATTREXTRANS=20 +RELAXNG_ERR_ELEMNOTEMPTY=21 +RELAXNG_ERR_NOELEM=22 +RELAXNG_ERR_NOTELEM=23 +RELAXNG_ERR_ATTRVALID=24 +RELAXNG_ERR_CONTENTVALID=25 +RELAXNG_ERR_EXTRACONTENT=26 +RELAXNG_ERR_INVALIDATTR=27 +RELAXNG_ERR_DATAELEM=28 +RELAXNG_ERR_VALELEM=29 +RELAXNG_ERR_LISTELEM=30 +RELAXNG_ERR_DATATYPE=31 +RELAXNG_ERR_VALUE=32 +RELAXNG_ERR_LIST=33 +RELAXNG_ERR_NOGRAMMAR=34 +RELAXNG_ERR_EXTRADATA=35 +RELAXNG_ERR_LACKDATA=36 +RELAXNG_ERR_INTERNAL=37 +RELAXNG_ERR_ELEMWRONG=38 +RELAXNG_ERR_TEXTWRONG=39 +""" +# --- END: GENERATED CONSTANTS --- + +__initErrorConstants() diff --git a/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi b/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi new file mode 100644 index 00000000..1531f6d9 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xmlid.pxi @@ -0,0 +1,179 @@ +cdef object _find_id_attributes + +def XMLID(text, parser=None, *, base_url=None): + """XMLID(text, parser=None, base_url=None) + + Parse the text and return a tuple (root node, ID dictionary). The root + node is the same as returned by the XML() function. The dictionary + contains string-element pairs. The dictionary keys are the values of 'id' + attributes. The elements referenced by the ID are stored as dictionary + values. + """ + cdef dict dic + global _find_id_attributes + if _find_id_attributes is None: + _find_id_attributes = XPath('//*[string(@id)]') + + # ElementTree compatible implementation: parse and look for 'id' attributes + root = XML(text, parser, base_url=base_url) + dic = {} + for elem in _find_id_attributes(root): + dic[elem.get('id')] = elem + return root, dic + +def XMLDTDID(text, parser=None, *, base_url=None): + """XMLDTDID(text, parser=None, base_url=None) + + Parse the text and return a tuple (root node, ID dictionary). The root + node is the same as returned by the XML() function. The dictionary + contains string-element pairs. The dictionary keys are the values of ID + attributes as defined by the DTD. The elements referenced by the ID are + stored as dictionary values. + + Note that you must not modify the XML tree if you use the ID dictionary. + The results are undefined. + """ + cdef _Element root + root = XML(text, parser, base_url=base_url) + # xml:id spec compatible implementation: use DTD ID attributes from libxml2 + if root._doc._c_doc.ids is NULL: + return root, {} + else: + return root, _IDDict(root) + +def parseid(source, parser=None, *, base_url=None): + """parseid(source, parser=None) + + Parses the source into a tuple containing an ElementTree object and an + ID dictionary. If no parser is provided as second argument, the default + parser is used. + + Note that you must not modify the XML tree if you use the ID dictionary. + The results are undefined. + """ + cdef _Document doc + doc = _parseDocument(source, parser, base_url) + return _elementTreeFactory(doc, None), _IDDict(doc) + +cdef class _IDDict: + """IDDict(self, etree) + A dictionary-like proxy class that mapps ID attributes to elements. + + The dictionary must be instantiated with the root element of a parsed XML + document, otherwise the behaviour is undefined. Elements and XML trees + that were created or modified 'by hand' are not supported. + """ + cdef _Document _doc + cdef object _keys + cdef object _items + def __cinit__(self, etree): + cdef _Document doc + doc = _documentOrRaise(etree) + if doc._c_doc.ids is NULL: + raise ValueError, "No ID dictionary available." + self._doc = doc + self._keys = None + self._items = None + + def copy(self): + return _IDDict(self._doc) + + def __getitem__(self, id_name): + cdef tree.xmlHashTable* c_ids + cdef tree.xmlID* c_id + cdef xmlAttr* c_attr + c_ids = self._doc._c_doc.ids + id_utf = _utf8(id_name) + c_id = <tree.xmlID*>tree.xmlHashLookup(c_ids, _xcstr(id_utf)) + if c_id is NULL: + raise KeyError, "key not found." + c_attr = c_id.attr + if c_attr is NULL or c_attr.parent is NULL: + raise KeyError, "ID attribute not found." + return _elementFactory(self._doc, c_attr.parent) + + def get(self, id_name): + return self[id_name] + + def __contains__(self, id_name): + cdef tree.xmlID* c_id + id_utf = _utf8(id_name) + c_id = <tree.xmlID*>tree.xmlHashLookup( + self._doc._c_doc.ids, _xcstr(id_utf)) + return c_id is not NULL + + def has_key(self, id_name): + return id_name in self + + def __repr__(self): + return repr(dict(self)) + + def keys(self): + if self._keys is None: + self._keys = self._build_keys() + return self._keys[:] + + def __iter__(self): + if self._keys is None: + self._keys = self._build_keys() + return iter(self._keys) + + def iterkeys(self): + return self + + def __len__(self): + if self._keys is None: + self._keys = self._build_keys() + return len(self._keys) + + def items(self): + if self._items is None: + self._items = self._build_items() + return self._items[:] + + def iteritems(self): + if self._items is None: + self._items = self._build_items() + return iter(self._items) + + def values(self): + cdef list values = [] + if self._items is None: + self._items = self._build_items() + for item in self._items: + value = python.PyTuple_GET_ITEM(item, 1) + python.Py_INCREF(value) + values.append(value) + return values + + def itervalues(self): + return iter(self.values()) + + cdef object _build_keys(self): + keys = [] + tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids, + <tree.xmlHashScanner>_collectIdHashKeys, <python.PyObject*>keys) + return keys + + cdef object _build_items(self): + items = [] + context = (items, self._doc) + tree.xmlHashScan(<tree.xmlHashTable*>self._doc._c_doc.ids, + <tree.xmlHashScanner>_collectIdHashItemList, <python.PyObject*>context) + return items + +cdef void _collectIdHashItemList(void* payload, void* context, xmlChar* name) noexcept: + # collect elements from ID attribute hash table + cdef list lst + c_id = <tree.xmlID*>payload + if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: + return + lst, doc = <tuple>context + element = _elementFactory(doc, c_id.attr.parent) + lst.append( (funicode(name), element) ) + +cdef void _collectIdHashKeys(void* payload, void* collect_list, xmlChar* name) noexcept: + c_id = <tree.xmlID*>payload + if c_id is NULL or c_id.attr is NULL or c_id.attr.parent is NULL: + return + (<list>collect_list).append(funicode(name)) diff --git a/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi b/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi new file mode 100644 index 00000000..ac5f9587 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xmlschema.pxi @@ -0,0 +1,215 @@ +# support for XMLSchema validation +from lxml.includes cimport xmlschema + + +cdef class XMLSchemaError(LxmlError): + """Base class of all XML Schema errors + """ + +cdef class XMLSchemaParseError(XMLSchemaError): + """Error while parsing an XML document as XML Schema. + """ + +cdef class XMLSchemaValidateError(XMLSchemaError): + """Error while validating an XML document with an XML Schema. + """ + + +################################################################################ +# XMLSchema + +cdef XPath _check_for_default_attributes = XPath( + "boolean(//xs:attribute[@default or @fixed][1])", + namespaces={'xs': 'http://www.w3.org/2001/XMLSchema'}) + + +cdef class XMLSchema(_Validator): + """XMLSchema(self, etree=None, file=None) + Turn a document into an XML Schema validator. + + Either pass a schema as Element or ElementTree, or pass a file or + filename through the ``file`` keyword argument. + + Passing the ``attribute_defaults`` boolean option will make the + schema insert default/fixed attributes into validated documents. + """ + cdef xmlschema.xmlSchema* _c_schema + cdef _Document _doc + cdef bint _has_default_attributes + cdef bint _add_attribute_defaults + + def __cinit__(self): + self._has_default_attributes = True # play it safe + self._add_attribute_defaults = False + + def __init__(self, etree=None, *, file=None, bint attribute_defaults=False): + cdef xmlschema.xmlSchemaParserCtxt* parser_ctxt + cdef xmlDoc* c_doc + + self._add_attribute_defaults = attribute_defaults + _Validator.__init__(self) + c_doc = NULL + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + c_doc = _copyDocRoot(doc._c_doc, root_node._c_node) + self._doc = _documentFactory(c_doc, doc._parser) + parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(c_doc) + elif file is not None: + file = _getFSPathOrObject(file) + if _isString(file): + filename = _encodeFilename(file) + parser_ctxt = xmlschema.xmlSchemaNewParserCtxt(_cstr(filename)) + else: + self._doc = _parseDocument(file, None, None) + parser_ctxt = xmlschema.xmlSchemaNewDocParserCtxt(self._doc._c_doc) + else: + raise XMLSchemaParseError, "No tree or file given" + + if parser_ctxt is NULL: + raise MemoryError() + + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + xmlschema.xmlSchemaSetParserStructuredErrors( + parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + if self._doc is not None: + # calling xmlSchemaParse on a schema with imports or + # includes will cause libxml2 to create an internal + # context for parsing, so push an implied context to route + # resolve requests to the document's parser + __GLOBAL_PARSER_CONTEXT.pushImpliedContextFromParser(self._doc._parser) + with nogil: + orig_loader = _register_document_loader() + self._c_schema = xmlschema.xmlSchemaParse(parser_ctxt) + _reset_document_loader(orig_loader) + if self._doc is not None: + __GLOBAL_PARSER_CONTEXT.popImpliedContext() + xmlschema.xmlSchemaFreeParserCtxt(parser_ctxt) + + if self._c_schema is NULL: + raise XMLSchemaParseError( + self._error_log._buildExceptionMessage( + "Document is not valid XML Schema"), + self._error_log) + + if self._doc is not None: + self._has_default_attributes = _check_for_default_attributes(self._doc) + self._add_attribute_defaults = attribute_defaults and self._has_default_attributes + + def __dealloc__(self): + xmlschema.xmlSchemaFree(self._c_schema) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using XML Schema. + + Returns true if document is valid, false if not. + """ + cdef xmlschema.xmlSchemaValidCtxt* valid_ctxt + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef int ret + + assert self._c_schema is not NULL, "Schema instance not initialised" + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + valid_ctxt = xmlschema.xmlSchemaNewValidCtxt(self._c_schema) + if valid_ctxt is NULL: + raise MemoryError() + + try: + if self._add_attribute_defaults: + xmlschema.xmlSchemaSetValidOptions( + valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE) + + self._error_log.clear() + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + xmlschema.xmlSchemaSetValidStructuredErrors( + valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + with nogil: + ret = xmlschema.xmlSchemaValidateDoc(valid_ctxt, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) + finally: + xmlschema.xmlSchemaFreeValidCtxt(valid_ctxt) + + if ret == -1: + raise XMLSchemaValidateError( + "Internal error in XML Schema validation.", + self._error_log) + if ret == 0: + return True + else: + return False + + cdef _ParserSchemaValidationContext _newSaxValidator( + self, bint add_default_attributes): + cdef _ParserSchemaValidationContext context + context = _ParserSchemaValidationContext.__new__(_ParserSchemaValidationContext) + context._schema = self + context._add_default_attributes = (self._has_default_attributes and ( + add_default_attributes or self._add_attribute_defaults)) + return context + +@cython.final +@cython.internal +cdef class _ParserSchemaValidationContext: + cdef XMLSchema _schema + cdef xmlschema.xmlSchemaValidCtxt* _valid_ctxt + cdef xmlschema.xmlSchemaSAXPlugStruct* _sax_plug + cdef bint _add_default_attributes + def __cinit__(self): + self._valid_ctxt = NULL + self._sax_plug = NULL + self._add_default_attributes = False + + def __dealloc__(self): + self.disconnect() + if self._valid_ctxt: + xmlschema.xmlSchemaFreeValidCtxt(self._valid_ctxt) + + cdef _ParserSchemaValidationContext copy(self): + assert self._schema is not None, "_ParserSchemaValidationContext not initialised" + return self._schema._newSaxValidator( + self._add_default_attributes) + + cdef void inject_default_attributes(self, xmlDoc* c_doc) noexcept: + # we currently need to insert default attributes manually + # after parsing, as libxml2 does not support this at parse + # time + if self._add_default_attributes: + with nogil: + xmlschema.xmlSchemaValidateDoc(self._valid_ctxt, c_doc) + + cdef int connect(self, xmlparser.xmlParserCtxt* c_ctxt, _BaseErrorLog error_log) except -1: + if self._valid_ctxt is NULL: + self._valid_ctxt = xmlschema.xmlSchemaNewValidCtxt( + self._schema._c_schema) + if self._valid_ctxt is NULL: + raise MemoryError() + if self._add_default_attributes: + xmlschema.xmlSchemaSetValidOptions( + self._valid_ctxt, xmlschema.XML_SCHEMA_VAL_VC_I_CREATE) + if error_log is not None: + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + xmlschema.xmlSchemaSetValidStructuredErrors( + self._valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>error_log) + self._sax_plug = xmlschema.xmlSchemaSAXPlug( + self._valid_ctxt, &c_ctxt.sax, &c_ctxt.userData) + + cdef void disconnect(self) noexcept: + if self._sax_plug is not NULL: + xmlschema.xmlSchemaSAXUnplug(self._sax_plug) + self._sax_plug = NULL + if self._valid_ctxt is not NULL: + xmlschema.xmlSchemaSetValidStructuredErrors( + self._valid_ctxt, NULL, NULL) + + cdef bint isvalid(self) noexcept: + if self._valid_ctxt is NULL: + return 1 # valid + return xmlschema.xmlSchemaIsValid(self._valid_ctxt) diff --git a/.venv/lib/python3.12/site-packages/lxml/xpath.pxi b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi new file mode 100644 index 00000000..352f6313 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xpath.pxi @@ -0,0 +1,487 @@ +# XPath evaluation + +class XPathSyntaxError(LxmlSyntaxError, XPathError): + pass + +################################################################################ +# XPath + +cdef object _XPATH_SYNTAX_ERRORS = ( + xmlerror.XML_XPATH_NUMBER_ERROR, + xmlerror.XML_XPATH_UNFINISHED_LITERAL_ERROR, + xmlerror.XML_XPATH_VARIABLE_REF_ERROR, + xmlerror.XML_XPATH_INVALID_PREDICATE_ERROR, + xmlerror.XML_XPATH_UNCLOSED_ERROR, + xmlerror.XML_XPATH_INVALID_CHAR_ERROR +) + +cdef object _XPATH_EVAL_ERRORS = ( + xmlerror.XML_XPATH_UNDEF_VARIABLE_ERROR, + xmlerror.XML_XPATH_UNDEF_PREFIX_ERROR, + xmlerror.XML_XPATH_UNKNOWN_FUNC_ERROR, + xmlerror.XML_XPATH_INVALID_OPERAND, + xmlerror.XML_XPATH_INVALID_TYPE, + xmlerror.XML_XPATH_INVALID_ARITY, + xmlerror.XML_XPATH_INVALID_CTXT_SIZE, + xmlerror.XML_XPATH_INVALID_CTXT_POSITION +) + +cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf) noexcept: + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), + _xpath_function_call) + else: + return xpath.xmlXPathRegisterFuncNS( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), + _xpath_function_call) + +cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf) noexcept: + if ns_utf is None: + return xpath.xmlXPathRegisterFunc( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), NULL) + else: + return xpath.xmlXPathRegisterFuncNS( + <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), NULL) + + +@cython.final +@cython.internal +cdef class _XPathContext(_BaseContext): + cdef object _variables + def __init__(self, namespaces, extensions, error_log, enable_regexp, variables, + build_smart_strings): + self._variables = variables + _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings) + + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): + self._set_xpath_context(xpathCtxt) + # This would be a good place to set up the XPath parser dict, but + # we cannot use the current thread dict as we do not know which + # thread will execute the XPath evaluator - so, no dict for now. + self.registerLocalNamespaces() + self.registerLocalFunctions(xpathCtxt, _register_xpath_function) + + cdef register_context(self, _Document doc): + self._register_context(doc) + self.registerGlobalNamespaces() + self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function) + self.registerExsltFunctions() + if self._variables is not None: + self.registerVariables(self._variables) + + cdef unregister_context(self): + self.unregisterGlobalFunctions( + self._xpathCtxt, _unregister_xpath_function) + self.unregisterGlobalNamespaces() + xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt) + self._cleanup_context() + + cdef void registerExsltFunctions(self) noexcept: + if xslt.LIBXSLT_VERSION < 10125: + # we'd only execute dummy functions anyway + return + tree.xmlHashScan( + self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces, + self._xpathCtxt) + + cdef registerVariables(self, variable_dict): + for name, value in variable_dict.items(): + name_utf = self._to_utf(name) + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None)) + + cdef registerVariable(self, name, value): + name_utf = self._to_utf(name) + xpath.xmlXPathRegisterVariable( + self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None)) + + +cdef void _registerExsltFunctionsForNamespaces( + void* _c_href, void* _ctxt, const_xmlChar* c_prefix) noexcept: + c_href = <const_xmlChar*> _c_href + ctxt = <xpath.xmlXPathContext*> _ctxt + + if tree.xmlStrcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0: + xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0: + xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0: + xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix) + elif tree.xmlStrcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0: + xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix) + + +cdef class _XPathEvaluatorBase: + cdef xpath.xmlXPathContext* _xpathCtxt + cdef _XPathContext _context + cdef python.PyThread_type_lock _eval_lock + cdef _ErrorLog _error_log + def __cinit__(self): + self._xpathCtxt = NULL + if config.ENABLE_THREADING: + self._eval_lock = python.PyThread_allocate_lock() + if self._eval_lock is NULL: + raise MemoryError() + self._error_log = _ErrorLog() + + def __init__(self, namespaces, extensions, enable_regexp, + smart_strings): + self._context = _XPathContext(namespaces, extensions, self._error_log, + enable_regexp, None, smart_strings) + + @property + def error_log(self): + assert self._error_log is not None, "XPath evaluator not initialised" + return self._error_log.copy() + + def __dealloc__(self): + if self._xpathCtxt is not NULL: + xpath.xmlXPathFreeContext(self._xpathCtxt) + if config.ENABLE_THREADING: + if self._eval_lock is not NULL: + python.PyThread_free_lock(self._eval_lock) + + cdef set_context(self, xpath.xmlXPathContext* xpathCtxt): + self._xpathCtxt = xpathCtxt + self._context.set_context(xpathCtxt) + + cdef bint _checkAbsolutePath(self, char* path) noexcept: + cdef char c + if path is NULL: + return 0 + c = path[0] + while c == c' ' or c == c'\t': + path = path + 1 + c = path[0] + return c == c'/' + + @cython.final + cdef int _lock(self) except -1: + cdef int result + if config.ENABLE_THREADING and self._eval_lock != NULL: + with nogil: + result = python.PyThread_acquire_lock( + self._eval_lock, python.WAIT_LOCK) + if result == 0: + raise XPathError, "XPath evaluator locking failed" + return 0 + + @cython.final + cdef void _unlock(self) noexcept: + if config.ENABLE_THREADING and self._eval_lock != NULL: + python.PyThread_release_lock(self._eval_lock) + + cdef _build_parse_error(self): + cdef _BaseErrorLog entries + entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS) + if entries: + message = entries._buildExceptionMessage(None) + if message is not None: + return XPathSyntaxError(message, self._error_log) + return XPathSyntaxError( + self._error_log._buildExceptionMessage("Error in xpath expression"), + self._error_log) + + cdef _build_eval_error(self): + cdef _BaseErrorLog entries + entries = self._error_log.filter_types(_XPATH_EVAL_ERRORS) + if not entries: + entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS) + if entries: + message = entries._buildExceptionMessage(None) + if message is not None: + return XPathEvalError(message, self._error_log) + return XPathEvalError( + self._error_log._buildExceptionMessage("Error in xpath expression"), + self._error_log) + + cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc): + if self._context._exc._has_raised(): + if xpathObj is not NULL: + _freeXPathObject(xpathObj) + xpathObj = NULL + self._context._release_temp_refs() + self._context._exc._raise_if_stored() + + if xpathObj is NULL: + self._context._release_temp_refs() + raise self._build_eval_error() + + try: + result = _unwrapXPathObject(xpathObj, doc, self._context) + finally: + _freeXPathObject(xpathObj) + self._context._release_temp_refs() + + return result + + +cdef class XPathElementEvaluator(_XPathEvaluatorBase): + """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True, smart_strings=True) + Create an XPath evaluator for an element. + + Absolute XPath expressions (starting with '/') will be evaluated against + the ElementTree as returned by getroottree(). + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + cdef _Element _element + def __init__(self, _Element element not None, *, namespaces=None, + extensions=None, regexp=True, smart_strings=True): + cdef xpath.xmlXPathContext* xpathCtxt + cdef int ns_register_status + cdef _Document doc + _assertValidNode(element) + _assertValidDoc(element._doc) + self._element = element + doc = element._doc + _XPathEvaluatorBase.__init__(self, namespaces, extensions, + regexp, smart_strings) + xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc) + if xpathCtxt is NULL: + raise MemoryError() + self.set_context(xpathCtxt) + + def register_namespace(self, prefix, uri): + """Register a namespace with the XPath context. + """ + assert self._xpathCtxt is not NULL, "XPath context not initialised" + self._context.addNamespace(prefix, uri) + + def register_namespaces(self, namespaces): + """Register a prefix -> uri dict. + """ + assert self._xpathCtxt is not NULL, "XPath context not initialised" + for prefix, uri in namespaces.items(): + self._context.addNamespace(prefix, uri) + + def __call__(self, _path, **_variables): + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. + + Variables may be provided as keyword arguments. Note that namespaces + are currently not supported for variables. + + Absolute XPath expressions (starting with '/') will be evaluated + against the ElementTree as returned by getroottree(). + """ + cdef xpath.xmlXPathObject* xpathObj + cdef _Document doc + assert self._xpathCtxt is not NULL, "XPath context not initialised" + path = _utf8(_path) + doc = self._element._doc + + self._lock() + self._xpathCtxt.node = self._element._c_node + try: + self._context.register_context(doc) + self._context.registerVariables(_variables) + c_path = _xcstr(path) + with nogil: + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) + result = self._handle_result(xpathObj, doc) + finally: + self._context.unregister_context() + self._unlock() + + return result + + +cdef class XPathDocumentEvaluator(XPathElementEvaluator): + """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True, smart_strings=True) + Create an XPath evaluator for an ElementTree. + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + def __init__(self, _ElementTree etree not None, *, namespaces=None, + extensions=None, regexp=True, smart_strings=True): + XPathElementEvaluator.__init__( + self, etree._context_node, namespaces=namespaces, + extensions=extensions, regexp=regexp, + smart_strings=smart_strings) + + def __call__(self, _path, **_variables): + """__call__(self, _path, **_variables) + + Evaluate an XPath expression on the document. + + Variables may be provided as keyword arguments. Note that namespaces + are currently not supported for variables. + """ + cdef xpath.xmlXPathObject* xpathObj + cdef xmlDoc* c_doc + cdef _Document doc + assert self._xpathCtxt is not NULL, "XPath context not initialised" + path = _utf8(_path) + doc = self._element._doc + + self._lock() + try: + self._context.register_context(doc) + c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node) + try: + self._context.registerVariables(_variables) + c_path = _xcstr(path) + with nogil: + self._xpathCtxt.doc = c_doc + self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc) + xpathObj = xpath.xmlXPathEvalExpression( + c_path, self._xpathCtxt) + result = self._handle_result(xpathObj, doc) + finally: + _destroyFakeDoc(doc._c_doc, c_doc) + self._context.unregister_context() + finally: + self._unlock() + + return result + + +def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None, + regexp=True, smart_strings=True): + """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True, smart_strings=True) + + Creates an XPath evaluator for an ElementTree or an Element. + + The resulting object can be called with an XPath expression as argument + and XPath variables provided as keyword arguments. + + Additional namespace declarations can be passed with the + 'namespace' keyword argument. EXSLT regular expression support + can be disabled with the 'regexp' boolean keyword (defaults to + True). Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + if isinstance(etree_or_element, _ElementTree): + return XPathDocumentEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp, smart_strings=smart_strings) + else: + return XPathElementEvaluator( + etree_or_element, namespaces=namespaces, + extensions=extensions, regexp=regexp, smart_strings=smart_strings) + + +cdef class XPath(_XPathEvaluatorBase): + """XPath(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True) + A compiled XPath expression that can be called on Elements and ElementTrees. + + Besides the XPath expression, you can pass prefix-namespace + mappings and extension functions to the constructor through the + keyword arguments ``namespaces`` and ``extensions``. EXSLT + regular expression support can be disabled with the 'regexp' + boolean keyword (defaults to True). Smart strings will be + returned for string results unless you pass + ``smart_strings=False``. + """ + cdef xpath.xmlXPathCompExpr* _xpath + cdef bytes _path + def __cinit__(self): + self._xpath = NULL + + def __init__(self, path, *, namespaces=None, extensions=None, + regexp=True, smart_strings=True): + cdef xpath.xmlXPathContext* xpathCtxt + _XPathEvaluatorBase.__init__(self, namespaces, extensions, + regexp, smart_strings) + self._path = _utf8(path) + xpathCtxt = xpath.xmlXPathNewContext(NULL) + if xpathCtxt is NULL: + raise MemoryError() + self.set_context(xpathCtxt) + self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _xcstr(self._path)) + if self._xpath is NULL: + raise self._build_parse_error() + + def __call__(self, _etree_or_element, **_variables): + "__call__(self, _etree_or_element, **_variables)" + cdef xpath.xmlXPathObject* xpathObj + cdef _Document document + cdef _Element element + + assert self._xpathCtxt is not NULL, "XPath context not initialised" + document = _documentOrRaise(_etree_or_element) + element = _rootNodeOrRaise(_etree_or_element) + + self._lock() + self._xpathCtxt.doc = document._c_doc + self._xpathCtxt.node = element._c_node + + try: + self._context.register_context(document) + self._context.registerVariables(_variables) + with nogil: + xpathObj = xpath.xmlXPathCompiledEval( + self._xpath, self._xpathCtxt) + result = self._handle_result(xpathObj, document) + finally: + self._context.unregister_context() + self._unlock() + return result + + @property + def path(self): + """The literal XPath expression. + """ + return self._path.decode('UTF-8') + + def __dealloc__(self): + if self._xpath is not NULL: + xpath.xmlXPathFreeCompExpr(self._xpath) + + def __repr__(self): + return self.path + + +cdef object _replace_strings = re.compile(b'("[^"]*")|(\'[^\']*\')').sub +cdef object _find_namespaces = re.compile(b'({[^}]+})').findall + +cdef class ETXPath(XPath): + """ETXPath(self, path, extensions=None, regexp=True, smart_strings=True) + Special XPath class that supports the ElementTree {uri} notation for namespaces. + + Note that this class does not accept the ``namespace`` keyword + argument. All namespaces must be passed as part of the path + string. Smart strings will be returned for string results unless + you pass ``smart_strings=False``. + """ + def __init__(self, path, *, extensions=None, regexp=True, + smart_strings=True): + path, namespaces = self._nsextract_path(path) + XPath.__init__(self, path, namespaces=namespaces, + extensions=extensions, regexp=regexp, + smart_strings=smart_strings) + + cdef _nsextract_path(self, path): + # replace {namespaces} by new prefixes + cdef dict namespaces = {} + cdef list namespace_defs = [] + cdef int i + path_utf = _utf8(path) + stripped_path = _replace_strings(b'', path_utf) # remove string literals + i = 1 + for namespace_def in _find_namespaces(stripped_path): + if namespace_def not in namespace_defs: + prefix = python.PyBytes_FromFormat("__xpp%02d", i) + i += 1 + namespace_defs.append(namespace_def) + namespace = namespace_def[1:-1] # remove '{}' + namespace = (<bytes>namespace).decode('utf8') + namespaces[prefix.decode('utf8')] = namespace + prefix_str = prefix + b':' + # FIXME: this also replaces {namespaces} within strings! + path_utf = path_utf.replace(namespace_def, prefix_str) + path = path_utf.decode('utf8') + return path, namespaces diff --git a/.venv/lib/python3.12/site-packages/lxml/xslt.pxi b/.venv/lib/python3.12/site-packages/lxml/xslt.pxi new file mode 100644 index 00000000..f7a7be29 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xslt.pxi @@ -0,0 +1,950 @@ +# XSLT +from lxml.includes cimport xslt + + +cdef class XSLTError(LxmlError): + """Base class of all XSLT errors. + """ + +cdef class XSLTParseError(XSLTError): + """Error parsing a stylesheet document. + """ + +cdef class XSLTApplyError(XSLTError): + """Error running an XSL transformation. + """ + +class XSLTSaveError(XSLTError, SerialisationError): + """Error serialising an XSLT result. + """ + +cdef class XSLTExtensionError(XSLTError): + """Error registering an XSLT extension. + """ + + +# version information +LIBXSLT_COMPILED_VERSION = __unpackIntVersion(xslt.LIBXSLT_VERSION) +LIBXSLT_VERSION = __unpackIntVersion(xslt.xsltLibxsltVersion) + + +################################################################################ +# Where do we store what? +# +# xsltStylesheet->doc->_private +# == _XSLTResolverContext for XSL stylesheet +# +# xsltTransformContext->_private +# == _XSLTResolverContext for transformed document +# +################################################################################ + + +################################################################################ +# XSLT document loaders + +@cython.final +@cython.internal +cdef class _XSLTResolverContext(_ResolverContext): + cdef xmlDoc* _c_style_doc + cdef _BaseParser _parser + + cdef _XSLTResolverContext _copy(self): + cdef _XSLTResolverContext context + context = _XSLTResolverContext() + _initXSLTResolverContext(context, self._parser) + context._c_style_doc = self._c_style_doc + return context + +cdef _initXSLTResolverContext(_XSLTResolverContext context, + _BaseParser parser): + _initResolverContext(context, parser.resolvers) + context._parser = parser + context._c_style_doc = NULL + +cdef xmlDoc* _xslt_resolve_from_python(const_xmlChar* c_uri, void* c_context, + int parse_options, int* error) with gil: + # call the Python document loaders + cdef _XSLTResolverContext context + cdef _ResolverRegistry resolvers + cdef _InputDocument doc_ref + cdef xmlDoc* c_doc + cdef xmlDoc* c_return_doc = NULL + + error[0] = 0 + context = <_XSLTResolverContext>c_context + + # shortcut if we resolve the stylesheet itself + c_doc = context._c_style_doc + try: + if c_doc is not NULL and c_doc.URL is not NULL: + if tree.xmlStrcmp(c_uri, c_doc.URL) == 0: + c_return_doc = _copyDoc(c_doc, 1) + return c_return_doc # 'goto', see 'finally' below + + # delegate to the Python resolvers + resolvers = context._resolvers + if tree.xmlStrncmp(<unsigned char*>'string://__STRING__XSLT__/', c_uri, 26) == 0: + c_uri += 26 + uri = _decodeFilename(c_uri) + doc_ref = resolvers.resolve(uri, None, context) + + if doc_ref is not None: + if doc_ref._type == PARSER_DATA_STRING: + c_return_doc = _parseDoc( + doc_ref._data_bytes, doc_ref._filename, context._parser) + elif doc_ref._type == PARSER_DATA_FILENAME: + c_return_doc = _parseDocFromFile( + doc_ref._filename, context._parser) + elif doc_ref._type == PARSER_DATA_FILE: + c_return_doc = _parseDocFromFilelike( + doc_ref._file, doc_ref._filename, context._parser) + elif doc_ref._type == PARSER_DATA_EMPTY: + c_return_doc = _newXMLDoc() + if c_return_doc is not NULL and c_return_doc.URL is NULL: + c_return_doc.URL = tree.xmlStrdup(c_uri) + except: + error[0] = 1 + context._store_raised() + finally: + return c_return_doc # and swallow any further exceptions + + +cdef void _xslt_store_resolver_exception(const_xmlChar* c_uri, void* context, + xslt.xsltLoadType c_type) noexcept with gil: + try: + message = f"Cannot resolve URI {_decodeFilename(c_uri)}" + if c_type == xslt.XSLT_LOAD_DOCUMENT: + exception = XSLTApplyError(message) + else: + exception = XSLTParseError(message) + (<_XSLTResolverContext>context)._store_exception(exception) + except BaseException as e: + (<_XSLTResolverContext>context)._store_exception(e) + finally: + return # and swallow any further exceptions + + +cdef xmlDoc* _xslt_doc_loader(const_xmlChar* c_uri, tree.xmlDict* c_dict, + int parse_options, void* c_ctxt, + xslt.xsltLoadType c_type) noexcept nogil: + # nogil => no Python objects here, may be called without thread context ! + cdef xmlDoc* c_doc + cdef xmlDoc* result + cdef void* c_pcontext + cdef int error = 0 + # find resolver contexts of stylesheet and transformed doc + if c_type == xslt.XSLT_LOAD_DOCUMENT: + # transformation time + c_pcontext = (<xslt.xsltTransformContext*>c_ctxt)._private + elif c_type == xslt.XSLT_LOAD_STYLESHEET: + # include/import resolution while parsing + c_pcontext = (<xslt.xsltStylesheet*>c_ctxt).doc._private + else: + c_pcontext = NULL + + if c_pcontext is NULL: + # can't call Python without context, fall back to default loader + return XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) + + c_doc = _xslt_resolve_from_python(c_uri, c_pcontext, parse_options, &error) + if c_doc is NULL and not error: + c_doc = XSLT_DOC_DEFAULT_LOADER( + c_uri, c_dict, parse_options, c_ctxt, c_type) + if c_doc is NULL: + _xslt_store_resolver_exception(c_uri, c_pcontext, c_type) + + if c_doc is not NULL and c_type == xslt.XSLT_LOAD_STYLESHEET: + c_doc._private = c_pcontext + return c_doc + +cdef xslt.xsltDocLoaderFunc XSLT_DOC_DEFAULT_LOADER = xslt.xsltDocDefaultLoader +xslt.xsltSetLoaderFunc(<xslt.xsltDocLoaderFunc>_xslt_doc_loader) + +################################################################################ +# XSLT file/network access control + +cdef class XSLTAccessControl: + """XSLTAccessControl(self, read_file=True, write_file=True, create_dir=True, read_network=True, write_network=True) + + Access control for XSLT: reading/writing files, directories and + network I/O. Access to a type of resource is granted or denied by + passing any of the following boolean keyword arguments. All of + them default to True to allow access. + + - read_file + - write_file + - create_dir + - read_network + - write_network + + For convenience, there is also a class member `DENY_ALL` that + provides an XSLTAccessControl instance that is readily configured + to deny everything, and a `DENY_WRITE` member that denies all + write access but allows read access. + + See `XSLT`. + """ + cdef xslt.xsltSecurityPrefs* _prefs + def __cinit__(self): + self._prefs = xslt.xsltNewSecurityPrefs() + if self._prefs is NULL: + raise MemoryError() + + def __init__(self, *, bint read_file=True, bint write_file=True, bint create_dir=True, + bint read_network=True, bint write_network=True): + self._setAccess(xslt.XSLT_SECPREF_READ_FILE, read_file) + self._setAccess(xslt.XSLT_SECPREF_WRITE_FILE, write_file) + self._setAccess(xslt.XSLT_SECPREF_CREATE_DIRECTORY, create_dir) + self._setAccess(xslt.XSLT_SECPREF_READ_NETWORK, read_network) + self._setAccess(xslt.XSLT_SECPREF_WRITE_NETWORK, write_network) + + DENY_ALL = XSLTAccessControl( + read_file=False, write_file=False, create_dir=False, + read_network=False, write_network=False) + + DENY_WRITE = XSLTAccessControl( + read_file=True, write_file=False, create_dir=False, + read_network=True, write_network=False) + + def __dealloc__(self): + if self._prefs is not NULL: + xslt.xsltFreeSecurityPrefs(self._prefs) + + @cython.final + cdef _setAccess(self, xslt.xsltSecurityOption option, bint allow): + cdef xslt.xsltSecurityCheck function + if allow: + function = xslt.xsltSecurityAllow + else: + function = xslt.xsltSecurityForbid + xslt.xsltSetSecurityPrefs(self._prefs, option, function) + + @cython.final + cdef void _register_in_context(self, xslt.xsltTransformContext* ctxt) noexcept: + xslt.xsltSetCtxtSecurityPrefs(self._prefs, ctxt) + + @property + def options(self): + """The access control configuration as a map of options.""" + return { + 'read_file': self._optval(xslt.XSLT_SECPREF_READ_FILE), + 'write_file': self._optval(xslt.XSLT_SECPREF_WRITE_FILE), + 'create_dir': self._optval(xslt.XSLT_SECPREF_CREATE_DIRECTORY), + 'read_network': self._optval(xslt.XSLT_SECPREF_READ_NETWORK), + 'write_network': self._optval(xslt.XSLT_SECPREF_WRITE_NETWORK), + } + + @cython.final + cdef _optval(self, xslt.xsltSecurityOption option): + cdef xslt.xsltSecurityCheck function + function = xslt.xsltGetSecurityPrefs(self._prefs, option) + if function is <xslt.xsltSecurityCheck>xslt.xsltSecurityAllow: + return True + elif function is <xslt.xsltSecurityCheck>xslt.xsltSecurityForbid: + return False + else: + return None + + def __repr__(self): + items = sorted(self.options.items()) + return "%s(%s)" % ( + python._fqtypename(self).decode('UTF-8').split('.')[-1], + ', '.join(["%s=%r" % item for item in items])) + +################################################################################ +# XSLT + +cdef int _register_xslt_function(void* ctxt, name_utf, ns_utf) noexcept: + if ns_utf is None: + return 0 + # libxml2 internalises the strings if ctxt has a dict + return xslt.xsltRegisterExtFunction( + <xslt.xsltTransformContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), + <xslt.xmlXPathFunction>_xpath_function_call) + +cdef dict EMPTY_DICT = {} + +@cython.final +@cython.internal +cdef class _XSLTContext(_BaseContext): + cdef xslt.xsltTransformContext* _xsltCtxt + cdef _ReadOnlyElementProxy _extension_element_proxy + cdef dict _extension_elements + def __cinit__(self): + self._xsltCtxt = NULL + self._extension_elements = EMPTY_DICT + + def __init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings): + if extensions is not None and extensions: + for ns_name_tuple, extension in extensions.items(): + if ns_name_tuple[0] is None: + raise XSLTExtensionError, \ + "extensions must not have empty namespaces" + if isinstance(extension, XSLTExtension): + if self._extension_elements is EMPTY_DICT: + self._extension_elements = {} + extensions = extensions.copy() + ns_utf = _utf8(ns_name_tuple[0]) + name_utf = _utf8(ns_name_tuple[1]) + self._extension_elements[(ns_utf, name_utf)] = extension + del extensions[ns_name_tuple] + _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp, + build_smart_strings) + + cdef _BaseContext _copy(self): + cdef _XSLTContext context + context = <_XSLTContext>_BaseContext._copy(self) + context._extension_elements = self._extension_elements + return context + + cdef register_context(self, xslt.xsltTransformContext* xsltCtxt, + _Document doc): + self._xsltCtxt = xsltCtxt + self._set_xpath_context(xsltCtxt.xpathCtxt) + self._register_context(doc) + self.registerLocalFunctions(xsltCtxt, _register_xslt_function) + self.registerGlobalFunctions(xsltCtxt, _register_xslt_function) + _registerXSLTExtensions(xsltCtxt, self._extension_elements) + + cdef free_context(self): + self._cleanup_context() + self._release_context() + if self._xsltCtxt is not NULL: + xslt.xsltFreeTransformContext(self._xsltCtxt) + self._xsltCtxt = NULL + self._release_temp_refs() + + +@cython.final +@cython.internal +@cython.freelist(8) +cdef class _XSLTQuotedStringParam: + """A wrapper class for literal XSLT string parameters that require + quote escaping. + """ + cdef bytes strval + def __cinit__(self, strval): + self.strval = _utf8(strval) + + +@cython.no_gc_clear +cdef class XSLT: + """XSLT(self, xslt_input, extensions=None, regexp=True, access_control=None) + + Turn an XSL document into an XSLT object. + + Calling this object on a tree or Element will execute the XSLT:: + + transform = etree.XSLT(xsl_tree) + result = transform(xml_tree) + + Keyword arguments of the constructor: + + - extensions: a dict mapping ``(namespace, name)`` pairs to + extension functions or extension elements + - regexp: enable exslt regular expression support in XPath + (default: True) + - access_control: access restrictions for network or file + system (see `XSLTAccessControl`) + + Keyword arguments of the XSLT call: + + - profile_run: enable XSLT profiling and make the profile available + as XML document in ``result.xslt_profile`` (default: False) + + Other keyword arguments of the call are passed to the stylesheet + as parameters. + """ + cdef _XSLTContext _context + cdef xslt.xsltStylesheet* _c_style + cdef _XSLTResolverContext _xslt_resolver_context + cdef XSLTAccessControl _access_control + cdef _ErrorLog _error_log + + def __cinit__(self): + self._c_style = NULL + + def __init__(self, xslt_input, *, extensions=None, regexp=True, + access_control=None): + cdef xslt.xsltStylesheet* c_style = NULL + cdef xmlDoc* c_doc + cdef _Document doc + cdef _Element root_node + + doc = _documentOrRaise(xslt_input) + root_node = _rootNodeOrRaise(xslt_input) + + # set access control or raise TypeError + self._access_control = access_control + + # make a copy of the document as stylesheet parsing modifies it + c_doc = _copyDocRoot(doc._c_doc, root_node._c_node) + + # make sure we always have a stylesheet URL + if c_doc.URL is NULL: + doc_url_utf = python.PyUnicode_AsASCIIString( + f"string://__STRING__XSLT__/{id(self)}.xslt") + c_doc.URL = tree.xmlStrdup(_xcstr(doc_url_utf)) + + self._error_log = _ErrorLog() + self._xslt_resolver_context = _XSLTResolverContext() + _initXSLTResolverContext(self._xslt_resolver_context, doc._parser) + # keep a copy in case we need to access the stylesheet via 'document()' + self._xslt_resolver_context._c_style_doc = _copyDoc(c_doc, 1) + c_doc._private = <python.PyObject*>self._xslt_resolver_context + + with self._error_log: + orig_loader = _register_document_loader() + c_style = xslt.xsltParseStylesheetDoc(c_doc) + _reset_document_loader(orig_loader) + + if c_style is NULL or c_style.errors: + tree.xmlFreeDoc(c_doc) + if c_style is not NULL: + xslt.xsltFreeStylesheet(c_style) + self._xslt_resolver_context._raise_if_stored() + # last error seems to be the most accurate here + if self._error_log.last_error is not None and \ + self._error_log.last_error.message: + raise XSLTParseError(self._error_log.last_error.message, + self._error_log) + else: + raise XSLTParseError( + self._error_log._buildExceptionMessage( + "Cannot parse stylesheet"), + self._error_log) + + c_doc._private = NULL # no longer used! + self._c_style = c_style + self._context = _XSLTContext(None, extensions, self._error_log, regexp, True) + + def __dealloc__(self): + if self._xslt_resolver_context is not None and \ + self._xslt_resolver_context._c_style_doc is not NULL: + tree.xmlFreeDoc(self._xslt_resolver_context._c_style_doc) + # this cleans up the doc copy as well + if self._c_style is not NULL: + xslt.xsltFreeStylesheet(self._c_style) + + @property + def error_log(self): + """The log of errors and warnings of an XSLT execution.""" + return self._error_log.copy() + + @staticmethod + def strparam(strval): + """strparam(strval) + + Mark an XSLT string parameter that requires quote escaping + before passing it into the transformation. Use it like this:: + + result = transform(doc, some_strval = XSLT.strparam( + '''it's \"Monty Python's\" ...''')) + + Escaped string parameters can be reused without restriction. + """ + return _XSLTQuotedStringParam(strval) + + @staticmethod + def set_global_max_depth(int max_depth): + """set_global_max_depth(max_depth) + + The maximum traversal depth that the stylesheet engine will allow. + This does not only count the template recursion depth but also takes + the number of variables/parameters into account. The required setting + for a run depends on both the stylesheet and the input data. + + Example:: + + XSLT.set_global_max_depth(5000) + + Note that this is currently a global, module-wide setting because + libxslt does not support it at a per-stylesheet level. + """ + if max_depth < 0: + raise ValueError("cannot set a maximum stylesheet traversal depth < 0") + xslt.xsltMaxDepth = max_depth + + def tostring(self, _ElementTree result_tree): + """tostring(self, result_tree) + + Save result doc to string based on stylesheet output method. + + :deprecated: use str(result_tree) instead. + """ + return str(result_tree) + + def __deepcopy__(self, memo): + return self.__copy__() + + def __copy__(self): + return _copyXSLT(self) + + def __call__(self, _input, *, profile_run=False, **kw): + """__call__(self, _input, profile_run=False, **kw) + + Execute the XSL transformation on a tree or Element. + + Pass the ``profile_run`` option to get profile information + about the XSLT. The result of the XSLT will have a property + xslt_profile that holds an XML tree with profiling data. + """ + cdef _XSLTContext context = None + cdef _XSLTResolverContext resolver_context + cdef _Document input_doc + cdef _Element root_node + cdef _Document result_doc + cdef _Document profile_doc = None + cdef xmlDoc* c_profile_doc + cdef xslt.xsltTransformContext* transform_ctxt + cdef xmlDoc* c_result = NULL + cdef xmlDoc* c_doc + cdef tree.xmlDict* c_dict + cdef const_char** params = NULL + + assert self._c_style is not NULL, "XSLT stylesheet not initialised" + input_doc = _documentOrRaise(_input) + root_node = _rootNodeOrRaise(_input) + + c_doc = _fakeRootDoc(input_doc._c_doc, root_node._c_node) + + transform_ctxt = xslt.xsltNewTransformContext(self._c_style, c_doc) + if transform_ctxt is NULL: + _destroyFakeDoc(input_doc._c_doc, c_doc) + raise MemoryError() + + # using the stylesheet dict is safer than using a possibly + # unrelated dict from the current thread. Almost all + # non-input tag/attr names will come from the stylesheet + # anyway. + if transform_ctxt.dict is not NULL: + xmlparser.xmlDictFree(transform_ctxt.dict) + if kw: + # parameter values are stored in the dict + # => avoid unnecessarily cluttering the global dict + transform_ctxt.dict = xmlparser.xmlDictCreateSub(self._c_style.doc.dict) + if transform_ctxt.dict is NULL: + xslt.xsltFreeTransformContext(transform_ctxt) + raise MemoryError() + else: + transform_ctxt.dict = self._c_style.doc.dict + xmlparser.xmlDictReference(transform_ctxt.dict) + + xslt.xsltSetCtxtParseOptions( + transform_ctxt, input_doc._parser._parse_options) + + if profile_run: + transform_ctxt.profile = 1 + + try: + context = self._context._copy() + context.register_context(transform_ctxt, input_doc) + + resolver_context = self._xslt_resolver_context._copy() + transform_ctxt._private = <python.PyObject*>resolver_context + + _convert_xslt_parameters(transform_ctxt, kw, ¶ms) + c_result = self._run_transform( + c_doc, params, context, transform_ctxt) + if params is not NULL: + # deallocate space for parameters + python.lxml_free(params) + + if transform_ctxt.state != xslt.XSLT_STATE_OK: + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + c_result = NULL + + if transform_ctxt.profile: + c_profile_doc = xslt.xsltGetProfileInformation(transform_ctxt) + if c_profile_doc is not NULL: + profile_doc = _documentFactory( + c_profile_doc, input_doc._parser) + finally: + if context is not None: + context.free_context() + _destroyFakeDoc(input_doc._c_doc, c_doc) + + try: + if resolver_context is not None and resolver_context._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + c_result = NULL + resolver_context._raise_if_stored() + + if context._exc._has_raised(): + if c_result is not NULL: + tree.xmlFreeDoc(c_result) + c_result = NULL + context._exc._raise_if_stored() + + if c_result is NULL: + # last error seems to be the most accurate here + error = self._error_log.last_error + if error is not None and error.message: + if error.line > 0: + message = f"{error.message}, line {error.line}" + else: + message = error.message + elif error is not None and error.line > 0: + message = f"Error applying stylesheet, line {error.line}" + else: + message = "Error applying stylesheet" + raise XSLTApplyError(message, self._error_log) + finally: + if resolver_context is not None: + resolver_context.clear() + + result_doc = _documentFactory(c_result, input_doc._parser) + + c_dict = c_result.dict + xmlparser.xmlDictReference(c_dict) + __GLOBAL_PARSER_CONTEXT.initThreadDictRef(&c_result.dict) + if c_dict is not c_result.dict or \ + self._c_style.doc.dict is not c_result.dict or \ + input_doc._c_doc.dict is not c_result.dict: + with nogil: + if c_dict is not c_result.dict: + fixThreadDictNames(<xmlNode*>c_result, + c_dict, c_result.dict) + if self._c_style.doc.dict is not c_result.dict: + fixThreadDictNames(<xmlNode*>c_result, + self._c_style.doc.dict, c_result.dict) + if input_doc._c_doc.dict is not c_result.dict: + fixThreadDictNames(<xmlNode*>c_result, + input_doc._c_doc.dict, c_result.dict) + xmlparser.xmlDictFree(c_dict) + + return _xsltResultTreeFactory(result_doc, self, profile_doc) + + cdef xmlDoc* _run_transform(self, xmlDoc* c_input_doc, + const_char** params, _XSLTContext context, + xslt.xsltTransformContext* transform_ctxt): + cdef xmlDoc* c_result + xslt.xsltSetTransformErrorFunc(transform_ctxt, <void*>self._error_log, + <xmlerror.xmlGenericErrorFunc>_receiveXSLTError) + if self._access_control is not None: + self._access_control._register_in_context(transform_ctxt) + with self._error_log, nogil: + orig_loader = _register_document_loader() + c_result = xslt.xsltApplyStylesheetUser( + self._c_style, c_input_doc, params, NULL, NULL, transform_ctxt) + _reset_document_loader(orig_loader) + return c_result + + +cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt, + dict parameters, const_char*** params_ptr): + cdef Py_ssize_t i, parameter_count + cdef const_char** params + cdef tree.xmlDict* c_dict = transform_ctxt.dict + params_ptr[0] = NULL + parameter_count = len(parameters) + if parameter_count == 0: + return + # allocate space for parameters + # * 2 as we want an entry for both key and value, + # and + 1 as array is NULL terminated + params = <const_char**>python.lxml_malloc(parameter_count * 2 + 1, sizeof(const_char*)) + if not params: + raise MemoryError() + try: + i = 0 + for key, value in parameters.iteritems(): + k = _utf8(key) + if isinstance(value, _XSLTQuotedStringParam): + v = (<_XSLTQuotedStringParam>value).strval + xslt.xsltQuoteOneUserParam( + transform_ctxt, _xcstr(k), _xcstr(v)) + else: + if isinstance(value, XPath): + v = (<XPath>value)._path + else: + v = _utf8(value) + params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(k), len(k)) + i += 1 + params[i] = <const_char*>tree.xmlDictLookup(c_dict, _xcstr(v), len(v)) + i += 1 + except: + python.lxml_free(params) + raise + params[i] = NULL + params_ptr[0] = params + +cdef XSLT _copyXSLT(XSLT stylesheet): + cdef XSLT new_xslt + cdef xmlDoc* c_doc + assert stylesheet._c_style is not NULL, "XSLT stylesheet not initialised" + new_xslt = XSLT.__new__(XSLT) + new_xslt._access_control = stylesheet._access_control + new_xslt._error_log = _ErrorLog() + new_xslt._context = stylesheet._context._copy() + + new_xslt._xslt_resolver_context = stylesheet._xslt_resolver_context._copy() + new_xslt._xslt_resolver_context._c_style_doc = _copyDoc( + stylesheet._xslt_resolver_context._c_style_doc, 1) + + c_doc = _copyDoc(stylesheet._c_style.doc, 1) + new_xslt._c_style = xslt.xsltParseStylesheetDoc(c_doc) + if new_xslt._c_style is NULL: + tree.xmlFreeDoc(c_doc) + raise MemoryError() + + return new_xslt + +@cython.final +cdef class _XSLTResultTree(_ElementTree): + """The result of an XSLT evaluation. + + Use ``str()`` or ``bytes()`` (or ``unicode()`` in Python 2.x) to serialise to a string, + and the ``.write_output()`` method to write serialise to a file. + """ + cdef XSLT _xslt + cdef _Document _profile + cdef xmlChar* _buffer + cdef Py_ssize_t _buffer_len + cdef Py_ssize_t _buffer_refcnt + + def write_output(self, file, *, compression=0): + """write_output(self, file, *, compression=0) + + Serialise the XSLT output to a file or file-like object. + + As opposed to the generic ``.write()`` method, ``.write_output()`` serialises + the result as defined by the ``<xsl:output>`` tag. + """ + cdef _FilelikeWriter writer = None + cdef _Document doc + cdef int r, rclose, c_compression + cdef const_xmlChar* c_encoding = NULL + cdef tree.xmlOutputBuffer* c_buffer + + if self._context_node is not None: + doc = self._context_node._doc + else: + doc = None + if doc is None: + doc = self._doc + if doc is None: + raise XSLTSaveError("No document to serialise") + c_compression = compression or 0 + xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style) + writer = _create_output_buffer(file, <const_char*>c_encoding, compression, &c_buffer, close=False) + if writer is None: + with nogil: + r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style) + rclose = tree.xmlOutputBufferClose(c_buffer) + else: + r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style) + rclose = tree.xmlOutputBufferClose(c_buffer) + if writer is not None: + writer._exc_context._raise_if_stored() + if r < 0 or rclose == -1: + python.PyErr_SetFromErrno(IOError) # raises IOError + + cdef _saveToStringAndSize(self, xmlChar** s, int* l): + cdef _Document doc + cdef int r + if self._context_node is not None: + doc = self._context_node._doc + else: + doc = None + if doc is None: + doc = self._doc + if doc is None: + s[0] = NULL + return + with nogil: + r = xslt.xsltSaveResultToString(s, l, doc._c_doc, + self._xslt._c_style) + if r == -1: + raise MemoryError() + + def __str__(self): + cdef xmlChar* encoding + cdef xmlChar* s = NULL + cdef int l = 0 + self._saveToStringAndSize(&s, &l) + if s is NULL: + return '' + encoding = self._xslt._c_style.encoding + try: + if encoding is NULL: + result = s[:l].decode('UTF-8') + else: + result = s[:l].decode(encoding) + finally: + tree.xmlFree(s) + return _stripEncodingDeclaration(result) + + def __getbuffer__(self, Py_buffer* buffer, int flags): + cdef int l = 0 + if buffer is NULL: + return + if self._buffer is NULL or flags & python.PyBUF_WRITABLE: + self._saveToStringAndSize(<xmlChar**>&buffer.buf, &l) + buffer.len = l + if self._buffer is NULL and not flags & python.PyBUF_WRITABLE: + self._buffer = <xmlChar*>buffer.buf + self._buffer_len = l + self._buffer_refcnt = 1 + else: + buffer.buf = self._buffer + buffer.len = self._buffer_len + self._buffer_refcnt += 1 + if flags & python.PyBUF_WRITABLE: + buffer.readonly = 0 + else: + buffer.readonly = 1 + if flags & python.PyBUF_FORMAT: + buffer.format = "B" + else: + buffer.format = NULL + buffer.ndim = 0 + buffer.shape = NULL + buffer.strides = NULL + buffer.suboffsets = NULL + buffer.itemsize = 1 + buffer.internal = NULL + if buffer.obj is not self: # set by Cython? + buffer.obj = self + + def __releasebuffer__(self, Py_buffer* buffer): + if buffer is NULL: + return + if <xmlChar*>buffer.buf is self._buffer: + self._buffer_refcnt -= 1 + if self._buffer_refcnt == 0: + tree.xmlFree(<char*>self._buffer) + self._buffer = NULL + else: + tree.xmlFree(<char*>buffer.buf) + buffer.buf = NULL + + property xslt_profile: + """Return an ElementTree with profiling data for the stylesheet run. + """ + def __get__(self): + cdef object root + if self._profile is None: + return None + root = self._profile.getroot() + if root is None: + return None + return ElementTree(root) + + def __del__(self): + self._profile = None + +cdef _xsltResultTreeFactory(_Document doc, XSLT xslt, _Document profile): + cdef _XSLTResultTree result + result = <_XSLTResultTree>_newElementTree(doc, None, _XSLTResultTree) + result._xslt = xslt + result._profile = profile + return result + +# functions like "output" and "write" are a potential security risk, but we +# rely on the user to configure XSLTAccessControl as needed +xslt.xsltRegisterAllExtras() + +# enable EXSLT support for XSLT +xslt.exsltRegisterAll() + + +################################################################################ +# XSLT PI support + +cdef object _RE_PI_HREF = re.compile(r'\s+href\s*=\s*(?:\'([^\']*)\'|"([^"]*)")') +cdef object _FIND_PI_HREF = _RE_PI_HREF.findall +cdef object _REPLACE_PI_HREF = _RE_PI_HREF.sub +cdef XPath __findStylesheetByID = None + +cdef _findStylesheetByID(_Document doc, id): + global __findStylesheetByID + if __findStylesheetByID is None: + __findStylesheetByID = XPath( + "//xsl:stylesheet[@xml:id = $id]", + namespaces={"xsl" : "http://www.w3.org/1999/XSL/Transform"}) + return __findStylesheetByID(doc, id=id) + +cdef class _XSLTProcessingInstruction(PIBase): + def parseXSL(self, parser=None): + """parseXSL(self, parser=None) + + Try to parse the stylesheet referenced by this PI and return + an ElementTree for it. If the stylesheet is embedded in the + same document (referenced via xml:id), find and return an + ElementTree for the stylesheet Element. + + The optional ``parser`` keyword argument can be passed to specify the + parser used to read from external stylesheet URLs. + """ + cdef _Document result_doc + cdef _Element result_node + cdef bytes href_utf + cdef const_xmlChar* c_href + cdef xmlAttr* c_attr + _assertValidNode(self) + if self._c_node.content is NULL: + raise ValueError, "PI lacks content" + hrefs = _FIND_PI_HREF(' ' + (<unsigned char*>self._c_node.content).decode('UTF-8')) + if len(hrefs) != 1: + raise ValueError, "malformed PI attributes" + hrefs = hrefs[0] + href_utf = utf8(hrefs[0] or hrefs[1]) + c_href = _xcstr(href_utf) + + if c_href[0] != c'#': + # normal URL, try to parse from it + c_href = tree.xmlBuildURI( + c_href, + tree.xmlNodeGetBase(self._c_node.doc, self._c_node)) + if c_href is not NULL: + try: + href_utf = <unsigned char*>c_href + finally: + tree.xmlFree(<char*>c_href) + result_doc = _parseDocumentFromURL(href_utf, parser) + return _elementTreeFactory(result_doc, None) + + # ID reference to embedded stylesheet + # try XML:ID lookup + _assertValidDoc(self._doc) + c_href += 1 # skip leading '#' + c_attr = tree.xmlGetID(self._c_node.doc, c_href) + if c_attr is not NULL and c_attr.doc is self._c_node.doc: + result_node = _elementFactory(self._doc, c_attr.parent) + return _elementTreeFactory(result_node._doc, result_node) + + # try XPath search + root = _findStylesheetByID(self._doc, funicode(c_href)) + if not root: + raise ValueError, "reference to non-existing embedded stylesheet" + elif len(root) > 1: + raise ValueError, "ambiguous reference to embedded stylesheet" + result_node = root[0] + return _elementTreeFactory(result_node._doc, result_node) + + def set(self, key, value): + """set(self, key, value) + + Supports setting the 'href' pseudo-attribute in the text of + the processing instruction. + """ + if key != "href": + raise AttributeError, \ + "only setting the 'href' attribute is supported on XSLT-PIs" + if value is None: + attrib = "" + elif '"' in value or '>' in value: + raise ValueError, "Invalid URL, must not contain '\"' or '>'" + else: + attrib = f' href="{value}"' + text = ' ' + self.text + if _FIND_PI_HREF(text): + self.text = _REPLACE_PI_HREF(attrib, text) + else: + self.text = text + attrib diff --git a/.venv/lib/python3.12/site-packages/lxml/xsltext.pxi b/.venv/lib/python3.12/site-packages/lxml/xsltext.pxi new file mode 100644 index 00000000..21894b9e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/xsltext.pxi @@ -0,0 +1,242 @@ +# XSLT extension elements + +cdef class XSLTExtension: + """Base class of an XSLT extension element. + """ + def execute(self, context, self_node, input_node, output_parent): + """execute(self, context, self_node, input_node, output_parent) + Execute this extension element. + + Subclasses must override this method. They may append + elements to the `output_parent` element here, or set its text + content. To this end, the `input_node` provides read-only + access to the current node in the input document, and the + `self_node` points to the extension element in the stylesheet. + + Note that the `output_parent` parameter may be `None` if there + is no parent element in the current context (e.g. no content + was added to the output tree yet). + """ + pass + + def apply_templates(self, _XSLTContext context not None, node, output_parent=None, + *, elements_only=False, remove_blank_text=False): + """apply_templates(self, context, node, output_parent=None, elements_only=False, remove_blank_text=False) + + Call this method to retrieve the result of applying templates + to an element. + + The return value is a list of elements or text strings that + were generated by the XSLT processor. If you pass + ``elements_only=True``, strings will be discarded from the result + list. The option ``remove_blank_text=True`` will only discard + strings that consist entirely of whitespace (e.g. formatting). + These options do not apply to Elements, only to bare string results. + + If you pass an Element as `output_parent` parameter, the result + will instead be appended to the element (including attributes + etc.) and the return value will be `None`. This is a safe way + to generate content into the output document directly, without + having to take care of special values like text or attributes. + Note that the string discarding options will be ignored in this + case. + """ + cdef xmlNode* c_parent + cdef xmlNode* c_node + cdef xmlNode* c_context_node + assert context._xsltCtxt is not NULL, "XSLT context not initialised" + c_context_node = _roNodeOf(node) + #assert c_context_node.doc is context._xsltContext.node.doc, \ + # "switching input documents during transformation is not currently supported" + + if output_parent is not None: + c_parent = _nonRoNodeOf(output_parent) + else: + c_parent = tree.xmlNewDocNode( + context._xsltCtxt.output, NULL, <unsigned char*>"fake-parent", NULL) + + c_node = context._xsltCtxt.insert + context._xsltCtxt.insert = c_parent + xslt.xsltProcessOneNode( + context._xsltCtxt, c_context_node, NULL) + context._xsltCtxt.insert = c_node + + if output_parent is not None: + return None + + try: + return self._collectXSLTResultContent( + context, c_parent, elements_only, remove_blank_text) + finally: + # free all intermediate nodes that will not be freed by proxies + tree.xmlFreeNode(c_parent) + + def process_children(self, _XSLTContext context not None, output_parent=None, + *, elements_only=False, remove_blank_text=False): + """process_children(self, context, output_parent=None, elements_only=False, remove_blank_text=False) + + Call this method to process the XSLT content of the extension + element itself. + + The return value is a list of elements or text strings that + were generated by the XSLT processor. If you pass + ``elements_only=True``, strings will be discarded from the result + list. The option ``remove_blank_text=True`` will only discard + strings that consist entirely of whitespace (e.g. formatting). + These options do not apply to Elements, only to bare string results. + + If you pass an Element as `output_parent` parameter, the result + will instead be appended to the element (including attributes + etc.) and the return value will be `None`. This is a safe way + to generate content into the output document directly, without + having to take care of special values like text or attributes. + Note that the string discarding options will be ignored in this + case. + """ + cdef xmlNode* c_parent + cdef xslt.xsltTransformContext* c_ctxt = context._xsltCtxt + cdef xmlNode* c_old_output_parent = c_ctxt.insert + assert context._xsltCtxt is not NULL, "XSLT context not initialised" + + # output_parent node is used for adding results instead of + # elements list used in apply_templates, that's easier and allows to + # use attributes added to extension element with <xsl:attribute>. + + if output_parent is not None: + c_parent = _nonRoNodeOf(output_parent) + else: + c_parent = tree.xmlNewDocNode( + context._xsltCtxt.output, NULL, <unsigned char*>"fake-parent", NULL) + + c_ctxt.insert = c_parent + xslt.xsltApplyOneTemplate(c_ctxt, + c_ctxt.node, c_ctxt.inst.children, NULL, NULL) + c_ctxt.insert = c_old_output_parent + + if output_parent is not None: + return None + + try: + return self._collectXSLTResultContent( + context, c_parent, elements_only, remove_blank_text) + finally: + # free all intermediate nodes that will not be freed by proxies + tree.xmlFreeNode(c_parent) + + cdef _collectXSLTResultContent(self, _XSLTContext context, xmlNode* c_parent, + bint elements_only, bint remove_blank_text): + cdef xmlNode* c_node + cdef xmlNode* c_next + cdef _ReadOnlyProxy proxy + cdef list results = [] # or maybe _collectAttributes(c_parent, 2) ? + c_node = c_parent.children + while c_node is not NULL: + c_next = c_node.next + if c_node.type == tree.XML_TEXT_NODE: + if not elements_only: + s = funicode(c_node.content) + if not remove_blank_text or s.strip(): + results.append(s) + s = None + elif c_node.type == tree.XML_ELEMENT_NODE: + proxy = _newReadOnlyProxy( + context._extension_element_proxy, c_node) + results.append(proxy) + # unlink node and make sure it will be freed later on + tree.xmlUnlinkNode(c_node) + proxy.free_after_use() + else: + raise TypeError, \ + f"unsupported XSLT result type: {c_node.type}" + c_node = c_next + return results + + +cdef _registerXSLTExtensions(xslt.xsltTransformContext* c_ctxt, + extension_dict): + for ns_utf, name_utf in extension_dict: + xslt.xsltRegisterExtElement( + c_ctxt, _xcstr(name_utf), _xcstr(ns_utf), + <xslt.xsltTransformFunction>_callExtensionElement) + +cdef void _callExtensionElement(xslt.xsltTransformContext* c_ctxt, + xmlNode* c_context_node, + xmlNode* c_inst_node, + void* dummy) noexcept with gil: + cdef _XSLTContext context + cdef XSLTExtension extension + cdef python.PyObject* dict_result + cdef xmlNode* c_node + cdef _ReadOnlyProxy context_node = None, self_node = None + cdef object output_parent # not restricted to ro-nodes + c_uri = _getNs(c_inst_node) + if c_uri is NULL: + # not allowed, and should never happen + return + if c_ctxt.xpathCtxt.userData is NULL: + # just for safety, should never happen + return + context = <_XSLTContext>c_ctxt.xpathCtxt.userData + try: + try: + dict_result = python.PyDict_GetItem( + context._extension_elements, (c_uri, c_inst_node.name)) + if dict_result is NULL: + raise KeyError, f"extension element {funicode(c_inst_node.name)} not found" + extension = <object>dict_result + + try: + # build the context proxy nodes + self_node = _newReadOnlyProxy(None, c_inst_node) + if _isElement(c_ctxt.insert): + output_parent = _newAppendOnlyProxy(self_node, c_ctxt.insert) + else: + # may be the document node or other stuff + output_parent = _newOpaqueAppendOnlyNodeWrapper(c_ctxt.insert) + if c_context_node.type in (tree.XML_DOCUMENT_NODE, + tree.XML_HTML_DOCUMENT_NODE): + c_node = tree.xmlDocGetRootElement(<xmlDoc*>c_context_node) + if c_node is not NULL: + context_node = _newReadOnlyProxy(self_node, c_node) + else: + context_node = None + elif c_context_node.type in (tree.XML_ATTRIBUTE_NODE, + tree.XML_TEXT_NODE, + tree.XML_CDATA_SECTION_NODE): + # this isn't easy to support using read-only + # nodes, as the smart-string factory must + # instantiate the parent proxy somehow... + raise TypeError(f"Unsupported element type: {c_context_node.type}") + else: + context_node = _newReadOnlyProxy(self_node, c_context_node) + + # run the XSLT extension + context._extension_element_proxy = self_node + extension.execute(context, self_node, context_node, output_parent) + finally: + context._extension_element_proxy = None + if self_node is not None: + _freeReadOnlyProxies(self_node) + except Exception as e: + try: + e = unicode(e).encode("UTF-8") + except: + e = repr(e).encode("UTF-8") + message = python.PyBytes_FromFormat( + "Error executing extension element '%s': %s", + c_inst_node.name, _cstr(e)) + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, "%s", message) + context._exc._store_raised() + except: + # just in case + message = python.PyBytes_FromFormat( + "Error executing extension element '%s'", c_inst_node.name) + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, "%s", message) + context._exc._store_raised() + except: + # no Python functions here - everything can fail... + xslt.xsltTransformError(c_ctxt, NULL, c_inst_node, + "Error during XSLT extension element evaluation") + context._exc._store_raised() + finally: + return # swallow any further exceptions |