about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/classlookup.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/classlookup.pxi580
1 files changed, 580 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
new file mode 100644
index 00000000..92d1d47a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
@@ -0,0 +1,580 @@
+# Configurable Element class lookup
+
+################################################################################
+# Custom Element classes
+
+cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
+                                          object LxmlElementBase ]:
+    """ElementBase(*children, attrib=None, nsmap=None, **_extra)
+
+    The public Element class.  All custom Element classes must inherit
+    from this one.  To create an Element, use the `Element()` factory.
+
+    BIG FAT WARNING: Subclasses *must not* override __init__ or
+    __new__ as it is absolutely undefined when these objects will be
+    created or destroyed.  All persistent state of Elements must be
+    stored in the underlying XML.  If you really need to initialize
+    the object after creation, you can implement an ``_init(self)``
+    method that will be called directly after object creation.
+
+    Subclasses of this class can be instantiated to create a new
+    Element.  By default, the tag name will be the class name and the
+    namespace will be empty.  You can modify this with the following
+    class attributes:
+
+    * TAG - the tag name, possibly containing a namespace in Clark
+      notation
+
+    * NAMESPACE - the default namespace URI, unless provided as part
+      of the TAG attribute.
+
+    * HTML - flag if the class is an HTML tag, as opposed to an XML
+      tag.  This only applies to un-namespaced tags and defaults to
+      false (i.e. XML).
+
+    * PARSER - the parser that provides the configuration for the
+      newly created document.  Providing an HTML parser here will
+      default to creating an HTML element.
+
+    In user code, the latter three are commonly inherited in class
+    hierarchies that implement a common namespace.
+    """
+    def __init__(self, *children, attrib=None, nsmap=None, **_extra):
+        """ElementBase(*children, attrib=None, nsmap=None, **_extra)
+        """
+        cdef bint is_html = 0
+        cdef _BaseParser parser
+        cdef _Element last_child
+        # don't use normal attribute access as it might be overridden
+        _getattr = object.__getattribute__
+        try:
+            namespace = _utf8(_getattr(self, 'NAMESPACE'))
+        except AttributeError:
+            namespace = None
+        try:
+            ns, tag = _getNsTag(_getattr(self, 'TAG'))
+            if ns is not None:
+                namespace = ns
+        except AttributeError:
+            tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
+            if b'.' in tag:
+                tag = tag.split(b'.')[-1]
+        try:
+            parser = _getattr(self, 'PARSER')
+        except AttributeError:
+            parser = None
+            for child in children:
+                if isinstance(child, _Element):
+                    parser = (<_Element>child)._doc._parser
+                    break
+        if isinstance(parser, HTMLParser):
+            is_html = 1
+        if namespace is None:
+            try:
+                is_html = _getattr(self, 'HTML')
+            except AttributeError:
+                pass
+        _initNewElement(self, is_html, tag, namespace, parser,
+                        attrib, nsmap, _extra)
+        last_child = None
+        for child in children:
+            if _isString(child):
+                if last_child is None:
+                    _setNodeText(self._c_node,
+                                 (_collectText(self._c_node.children) or '') + child)
+                else:
+                    _setTailText(last_child._c_node,
+                                 (_collectText(last_child._c_node.next) or '') + child)
+            elif isinstance(child, _Element):
+                last_child = child
+                _appendChild(self, last_child)
+            elif isinstance(child, type) and issubclass(child, ElementBase):
+                last_child = child()
+                _appendChild(self, last_child)
+            else:
+                raise TypeError, f"Invalid child type: {type(child)!r}"
+
+cdef class CommentBase(_Comment):
+    """All custom Comment classes must inherit from this one.
+
+    To create an XML Comment instance, use the ``Comment()`` factory.
+
+    Subclasses *must not* override __init__ or __new__ as it is
+    absolutely undefined when these objects will be created or
+    destroyed.  All persistent state of Comments must be stored in the
+    underlying XML.  If you really need to initialize the object after
+    creation, you can implement an ``_init(self)`` method that will be
+    called after object creation.
+    """
+    def __init__(self, text):
+        # copied from Comment() factory
+        cdef _Document doc
+        cdef xmlDoc*   c_doc
+        if text is None:
+            text = b''
+        else:
+            text = _utf8(text)
+        c_doc = _newXMLDoc()
+        doc = _documentFactory(c_doc, None)
+        self._c_node = _createComment(c_doc, _xcstr(text))
+        if self._c_node is NULL:
+            raise MemoryError()
+        tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+        _registerProxy(self, doc, self._c_node)
+        self._init()
+
+cdef class PIBase(_ProcessingInstruction):
+    """All custom Processing Instruction classes must inherit from this one.
+
+    To create an XML ProcessingInstruction instance, use the ``PI()``
+    factory.
+
+    Subclasses *must not* override __init__ or __new__ as it is
+    absolutely undefined when these objects will be created or
+    destroyed.  All persistent state of PIs must be stored in the
+    underlying XML.  If you really need to initialize the object after
+    creation, you can implement an ``_init(self)`` method that will be
+    called after object creation.
+    """
+    def __init__(self, target, text=None):
+        # copied from PI() factory
+        cdef _Document doc
+        cdef xmlDoc*   c_doc
+        target = _utf8(target)
+        if text is None:
+            text = b''
+        else:
+            text = _utf8(text)
+        c_doc = _newXMLDoc()
+        doc = _documentFactory(c_doc, None)
+        self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
+        if self._c_node is NULL:
+            raise MemoryError()
+        tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+        _registerProxy(self, doc, self._c_node)
+        self._init()
+
+cdef class EntityBase(_Entity):
+    """All custom Entity classes must inherit from this one.
+
+    To create an XML Entity instance, use the ``Entity()`` factory.
+
+    Subclasses *must not* override __init__ or __new__ as it is
+    absolutely undefined when these objects will be created or
+    destroyed.  All persistent state of Entities must be stored in the
+    underlying XML.  If you really need to initialize the object after
+    creation, you can implement an ``_init(self)`` method that will be
+    called after object creation.
+    """
+    def __init__(self, name):
+        cdef _Document doc
+        cdef xmlDoc*   c_doc
+        name_utf = _utf8(name)
+        c_name = _xcstr(name_utf)
+        if c_name[0] == c'#':
+            if not _characterReferenceIsValid(c_name + 1):
+                raise ValueError, f"Invalid character reference: '{name}'"
+        elif not _xmlNameIsValid(c_name):
+            raise ValueError, f"Invalid entity reference: '{name}'"
+        c_doc = _newXMLDoc()
+        doc = _documentFactory(c_doc, None)
+        self._c_node = _createEntity(c_doc, c_name)
+        if self._c_node is NULL:
+            raise MemoryError()
+        tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+        _registerProxy(self, doc, self._c_node)
+        self._init()
+
+
+cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
+    if c_node.type == tree.XML_ELEMENT_NODE:
+        expected = ElementBase
+    elif c_node.type == tree.XML_COMMENT_NODE:
+        expected = CommentBase
+    elif c_node.type == tree.XML_ENTITY_REF_NODE:
+        expected = EntityBase
+    elif c_node.type == tree.XML_PI_NODE:
+        expected = PIBase
+    else:
+        assert False, f"Unknown node type: {c_node.type}"
+
+    if not (isinstance(cls, type) and issubclass(cls, expected)):
+        raise TypeError(
+            f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}")
+    return 0
+
+
+################################################################################
+# Element class lookup
+
+ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
+
+# class to store element class lookup functions
+cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
+                                       object LxmlElementClassLookup ]:
+    """ElementClassLookup(self)
+    Superclass of Element class lookups.
+    """
+    cdef _element_class_lookup_function _lookup_function
+
+
+cdef public class FallbackElementClassLookup(ElementClassLookup) \
+         [ type LxmlFallbackElementClassLookupType,
+           object LxmlFallbackElementClassLookup ]:
+    """FallbackElementClassLookup(self, fallback=None)
+
+    Superclass of Element class lookups with additional fallback.
+    """
+    cdef readonly ElementClassLookup fallback
+    cdef _element_class_lookup_function _fallback_function
+    def __cinit__(self):
+        # fall back to default lookup
+        self._fallback_function = _lookupDefaultElementClass
+
+    def __init__(self, ElementClassLookup fallback=None):
+        if fallback is not None:
+            self._setFallback(fallback)
+        else:
+            self._fallback_function = _lookupDefaultElementClass
+
+    cdef void _setFallback(self, ElementClassLookup lookup):
+        """Sets the fallback scheme for this lookup method.
+        """
+        self.fallback = lookup
+        self._fallback_function = lookup._lookup_function
+        if self._fallback_function is NULL:
+            self._fallback_function = _lookupDefaultElementClass
+
+    def set_fallback(self, ElementClassLookup lookup not None):
+        """set_fallback(self, lookup)
+
+        Sets the fallback scheme for this lookup method.
+        """
+        self._setFallback(lookup)
+
+cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
+                                       _Document doc, xmlNode* c_node):
+    return lookup._fallback_function(lookup.fallback, doc, c_node)
+
+
+################################################################################
+# default lookup scheme
+
+cdef class ElementDefaultClassLookup(ElementClassLookup):
+    """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
+    Element class lookup scheme that always returns the default Element
+    class.
+
+    The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
+    accept the respective Element classes.
+    """
+    cdef readonly object element_class
+    cdef readonly object comment_class
+    cdef readonly object pi_class
+    cdef readonly object entity_class
+    def __cinit__(self):
+        self._lookup_function = _lookupDefaultElementClass
+
+    def __init__(self, element=None, comment=None, pi=None, entity=None):
+        if element is None:
+            self.element_class = _Element
+        elif issubclass(element, ElementBase):
+            self.element_class = element
+        else:
+            raise TypeError, "element class must be subclass of ElementBase"
+
+        if comment is None:
+            self.comment_class = _Comment
+        elif issubclass(comment, CommentBase):
+            self.comment_class = comment
+        else:
+            raise TypeError, "comment class must be subclass of CommentBase"
+
+        if entity is None:
+            self.entity_class = _Entity
+        elif issubclass(entity, EntityBase):
+            self.entity_class = entity
+        else:
+            raise TypeError, "Entity class must be subclass of EntityBase"
+
+        if pi is None:
+            self.pi_class = None # special case, see below
+        elif issubclass(pi, PIBase):
+            self.pi_class = pi
+        else:
+            raise TypeError, "PI class must be subclass of PIBase"
+
+cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
+    "Trivial class lookup function that always returns the default class."
+    if c_node.type == tree.XML_ELEMENT_NODE:
+        if state is not None:
+            return (<ElementDefaultClassLookup>state).element_class
+        else:
+            return _Element
+    elif c_node.type == tree.XML_COMMENT_NODE:
+        if state is not None:
+            return (<ElementDefaultClassLookup>state).comment_class
+        else:
+            return _Comment
+    elif c_node.type == tree.XML_ENTITY_REF_NODE:
+        if state is not None:
+            return (<ElementDefaultClassLookup>state).entity_class
+        else:
+            return _Entity
+    elif c_node.type == tree.XML_PI_NODE:
+        if state is None or (<ElementDefaultClassLookup>state).pi_class is None:
+            # special case XSLT-PI
+            if c_node.name is not NULL and c_node.content is not NULL:
+                if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0:
+                    if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \
+                           tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL:
+                        return _XSLTProcessingInstruction
+            return _ProcessingInstruction
+        else:
+            return (<ElementDefaultClassLookup>state).pi_class
+    else:
+        assert False, f"Unknown node type: {c_node.type}"
+
+
+################################################################################
+# attribute based lookup scheme
+
+cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
+    """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
+    Checks an attribute of an Element and looks up the value in a
+    class dictionary.
+
+    Arguments:
+      - attribute name - '{ns}name' style string
+      - class mapping  - Python dict mapping attribute values to Element classes
+      - fallback       - optional fallback lookup mechanism
+
+    A None key in the class mapping will be checked if the attribute is
+    missing.
+    """
+    cdef object _class_mapping
+    cdef tuple _pytag
+    cdef const_xmlChar* _c_ns
+    cdef const_xmlChar* _c_name
+    def __cinit__(self):
+        self._lookup_function = _attribute_class_lookup
+
+    def __init__(self, attribute_name, class_mapping,
+                 ElementClassLookup fallback=None):
+        self._pytag = _getNsTag(attribute_name)
+        ns, name = self._pytag
+        if ns is None:
+            self._c_ns = NULL
+        else:
+            self._c_ns = _xcstr(ns)
+        self._c_name = _xcstr(name)
+        self._class_mapping = dict(class_mapping)
+
+        FallbackElementClassLookup.__init__(self, fallback)
+
+cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
+    cdef AttributeBasedElementClassLookup lookup
+    cdef python.PyObject* dict_result
+
+    lookup = <AttributeBasedElementClassLookup>state
+    if c_node.type == tree.XML_ELEMENT_NODE:
+        value = _attributeValueFromNsName(
+            c_node, lookup._c_ns, lookup._c_name)
+        dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
+        if dict_result is not NULL:
+            cls = <object>dict_result
+            _validateNodeClass(c_node, cls)
+            return cls
+    return _callLookupFallback(lookup, doc, c_node)
+
+
+################################################################################
+#  per-parser lookup scheme
+
+cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
+    """ParserBasedElementClassLookup(self, fallback=None)
+    Element class lookup based on the XML parser.
+    """
+    def __cinit__(self):
+        self._lookup_function = _parser_class_lookup
+
+cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
+    if doc._parser._class_lookup is not None:
+        return doc._parser._class_lookup._lookup_function(
+            doc._parser._class_lookup, doc, c_node)
+    return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node)
+
+
+################################################################################
+#  custom class lookup based on node type, namespace, name
+
+cdef class CustomElementClassLookup(FallbackElementClassLookup):
+    """CustomElementClassLookup(self, fallback=None)
+    Element class lookup based on a subclass method.
+
+    You can inherit from this class and override the method::
+
+        lookup(self, type, doc, namespace, name)
+
+    to lookup the element class for a node. Arguments of the method:
+    * type:      one of 'element', 'comment', 'PI', 'entity'
+    * doc:       document that the node is in
+    * namespace: namespace URI of the node (or None for comments/PIs/entities)
+    * name:      name of the element/entity, None for comments, target for PIs
+
+    If you return None from this method, the fallback will be called.
+    """
+    def __cinit__(self):
+        self._lookup_function = _custom_class_lookup
+
+    def lookup(self, type, doc, namespace, name):
+        "lookup(self, type, doc, namespace, name)"
+        return None
+
+cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
+    cdef CustomElementClassLookup lookup
+
+    lookup = <CustomElementClassLookup>state
+
+    if c_node.type == tree.XML_ELEMENT_NODE:
+        element_type = "element"
+    elif c_node.type == tree.XML_COMMENT_NODE:
+        element_type = "comment"
+    elif c_node.type == tree.XML_PI_NODE:
+        element_type = "PI"
+    elif c_node.type == tree.XML_ENTITY_REF_NODE:
+        element_type = "entity"
+    else:
+        element_type = "element"
+    if c_node.name is NULL:
+        name = None
+    else:
+        name = funicode(c_node.name)
+    c_str = tree._getNs(c_node)
+    ns = funicode(c_str) if c_str is not NULL else None
+
+    cls = lookup.lookup(element_type, doc, ns, name)
+    if cls is not None:
+        _validateNodeClass(c_node, cls)
+        return cls
+    return _callLookupFallback(lookup, doc, c_node)
+
+
+################################################################################
+# read-only tree based class lookup
+
+cdef class PythonElementClassLookup(FallbackElementClassLookup):
+    """PythonElementClassLookup(self, fallback=None)
+    Element class lookup based on a subclass method.
+
+    This class lookup scheme allows access to the entire XML tree in
+    read-only mode.  To use it, re-implement the ``lookup(self, doc,
+    root)`` method in a subclass::
+
+        from lxml import etree, pyclasslookup
+
+        class MyElementClass(etree.ElementBase):
+            honkey = True
+
+        class MyLookup(pyclasslookup.PythonElementClassLookup):
+            def lookup(self, doc, root):
+                if root.tag == "sometag":
+                    return MyElementClass
+                else:
+                    for child in root:
+                        if child.tag == "someothertag":
+                            return MyElementClass
+                # delegate to default
+                return None
+
+    If you return None from this method, the fallback will be called.
+
+    The first argument is the opaque document instance that contains
+    the Element.  The second argument is a lightweight Element proxy
+    implementation that is only valid during the lookup.  Do not try
+    to keep a reference to it.  Once the lookup is done, the proxy
+    will be invalid.
+
+    Also, you cannot wrap such a read-only Element in an ElementTree,
+    and you must take care not to keep a reference to them outside of
+    the `lookup()` method.
+
+    Note that the API of the Element objects is not complete.  It is
+    purely read-only and does not support all features of the normal
+    `lxml.etree` API (such as XPath, extended slicing or some
+    iteration methods).
+
+    See https://lxml.de/element_classes.html
+    """
+    def __cinit__(self):
+        self._lookup_function = _python_class_lookup
+
+    def lookup(self, doc, element):
+        """lookup(self, doc, element)
+
+        Override this method to implement your own lookup scheme.
+        """
+        return None
+
+cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node):
+    cdef PythonElementClassLookup lookup
+    cdef _ReadOnlyProxy proxy
+    lookup = <PythonElementClassLookup>state
+
+    proxy = _newReadOnlyProxy(None, c_node)
+    cls = lookup.lookup(doc, proxy)
+    _freeReadOnlyProxies(proxy)
+
+    if cls is not None:
+        _validateNodeClass(c_node, cls)
+        return cls
+    return _callLookupFallback(lookup, doc, c_node)
+
+################################################################################
+# Global setup
+
+cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS
+cdef object ELEMENT_CLASS_LOOKUP_STATE
+
+cdef void _setElementClassLookupFunction(
+    _element_class_lookup_function function, object state):
+    global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE
+    if function is NULL:
+        state    = DEFAULT_ELEMENT_CLASS_LOOKUP
+        function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function
+
+    ELEMENT_CLASS_LOOKUP_STATE = state
+    LOOKUP_ELEMENT_CLASS = function
+
+def set_element_class_lookup(ElementClassLookup lookup = None):
+    """set_element_class_lookup(lookup = None)
+
+    Set the global element class lookup method.
+
+    This defines the main entry point for looking up element implementations.
+    The standard implementation uses the :class:`ParserBasedElementClassLookup`
+    to delegate to different lookup schemes for each parser. 
+
+    .. warning::
+
+        This should only be changed by applications, not by library packages.
+        In most cases, parser specific lookups should be preferred,
+        which can be configured via
+        :meth:`~lxml.etree.XMLParser.set_element_class_lookup`
+        (and the same for HTML parsers).
+
+        Globally replacing the element class lookup by something other than a
+        :class:`ParserBasedElementClassLookup` will prevent parser specific lookup
+        schemes from working. Several tools rely on parser specific lookups,
+        including :mod:`lxml.html` and :mod:`lxml.objectify`.
+    """
+    if lookup is None or lookup._lookup_function is NULL:
+        _setElementClassLookupFunction(NULL, None)
+    else:
+        _setElementClassLookupFunction(lookup._lookup_function, lookup)
+
+# default setup: parser delegation
+cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
+DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
+
+set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)