aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/classlookup.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/classlookup.pxi580
1 files changed, 580 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
new file mode 100644
index 00000000..92d1d47a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi
@@ -0,0 +1,580 @@
+# Configurable Element class lookup
+
+################################################################################
+# Custom Element classes
+
+cdef public class ElementBase(_Element) [ type LxmlElementBaseType,
+ object LxmlElementBase ]:
+ """ElementBase(*children, attrib=None, nsmap=None, **_extra)
+
+ The public Element class. All custom Element classes must inherit
+ from this one. To create an Element, use the `Element()` factory.
+
+ BIG FAT WARNING: Subclasses *must not* override __init__ or
+ __new__ as it is absolutely undefined when these objects will be
+ created or destroyed. All persistent state of Elements must be
+ stored in the underlying XML. If you really need to initialize
+ the object after creation, you can implement an ``_init(self)``
+ method that will be called directly after object creation.
+
+ Subclasses of this class can be instantiated to create a new
+ Element. By default, the tag name will be the class name and the
+ namespace will be empty. You can modify this with the following
+ class attributes:
+
+ * TAG - the tag name, possibly containing a namespace in Clark
+ notation
+
+ * NAMESPACE - the default namespace URI, unless provided as part
+ of the TAG attribute.
+
+ * HTML - flag if the class is an HTML tag, as opposed to an XML
+ tag. This only applies to un-namespaced tags and defaults to
+ false (i.e. XML).
+
+ * PARSER - the parser that provides the configuration for the
+ newly created document. Providing an HTML parser here will
+ default to creating an HTML element.
+
+ In user code, the latter three are commonly inherited in class
+ hierarchies that implement a common namespace.
+ """
+ def __init__(self, *children, attrib=None, nsmap=None, **_extra):
+ """ElementBase(*children, attrib=None, nsmap=None, **_extra)
+ """
+ cdef bint is_html = 0
+ cdef _BaseParser parser
+ cdef _Element last_child
+ # don't use normal attribute access as it might be overridden
+ _getattr = object.__getattribute__
+ try:
+ namespace = _utf8(_getattr(self, 'NAMESPACE'))
+ except AttributeError:
+ namespace = None
+ try:
+ ns, tag = _getNsTag(_getattr(self, 'TAG'))
+ if ns is not None:
+ namespace = ns
+ except AttributeError:
+ tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__'))
+ if b'.' in tag:
+ tag = tag.split(b'.')[-1]
+ try:
+ parser = _getattr(self, 'PARSER')
+ except AttributeError:
+ parser = None
+ for child in children:
+ if isinstance(child, _Element):
+ parser = (<_Element>child)._doc._parser
+ break
+ if isinstance(parser, HTMLParser):
+ is_html = 1
+ if namespace is None:
+ try:
+ is_html = _getattr(self, 'HTML')
+ except AttributeError:
+ pass
+ _initNewElement(self, is_html, tag, namespace, parser,
+ attrib, nsmap, _extra)
+ last_child = None
+ for child in children:
+ if _isString(child):
+ if last_child is None:
+ _setNodeText(self._c_node,
+ (_collectText(self._c_node.children) or '') + child)
+ else:
+ _setTailText(last_child._c_node,
+ (_collectText(last_child._c_node.next) or '') + child)
+ elif isinstance(child, _Element):
+ last_child = child
+ _appendChild(self, last_child)
+ elif isinstance(child, type) and issubclass(child, ElementBase):
+ last_child = child()
+ _appendChild(self, last_child)
+ else:
+ raise TypeError, f"Invalid child type: {type(child)!r}"
+
+cdef class CommentBase(_Comment):
+ """All custom Comment classes must inherit from this one.
+
+ To create an XML Comment instance, use the ``Comment()`` factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of Comments must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, text):
+ # copied from Comment() factory
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ if text is None:
+ text = b''
+ else:
+ text = _utf8(text)
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createComment(c_doc, _xcstr(text))
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+cdef class PIBase(_ProcessingInstruction):
+ """All custom Processing Instruction classes must inherit from this one.
+
+ To create an XML ProcessingInstruction instance, use the ``PI()``
+ factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of PIs must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, target, text=None):
+ # copied from PI() factory
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ target = _utf8(target)
+ if text is None:
+ text = b''
+ else:
+ text = _utf8(text)
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text))
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+cdef class EntityBase(_Entity):
+ """All custom Entity classes must inherit from this one.
+
+ To create an XML Entity instance, use the ``Entity()`` factory.
+
+ Subclasses *must not* override __init__ or __new__ as it is
+ absolutely undefined when these objects will be created or
+ destroyed. All persistent state of Entities must be stored in the
+ underlying XML. If you really need to initialize the object after
+ creation, you can implement an ``_init(self)`` method that will be
+ called after object creation.
+ """
+ def __init__(self, name):
+ cdef _Document doc
+ cdef xmlDoc* c_doc
+ name_utf = _utf8(name)
+ c_name = _xcstr(name_utf)
+ if c_name[0] == c'#':
+ if not _characterReferenceIsValid(c_name + 1):
+ raise ValueError, f"Invalid character reference: '{name}'"
+ elif not _xmlNameIsValid(c_name):
+ raise ValueError, f"Invalid entity reference: '{name}'"
+ c_doc = _newXMLDoc()
+ doc = _documentFactory(c_doc, None)
+ self._c_node = _createEntity(c_doc, c_name)
+ if self._c_node is NULL:
+ raise MemoryError()
+ tree.xmlAddChild(<xmlNode*>c_doc, self._c_node)
+ _registerProxy(self, doc, self._c_node)
+ self._init()
+
+
+cdef int _validateNodeClass(xmlNode* c_node, cls) except -1:
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ expected = ElementBase
+ elif c_node.type == tree.XML_COMMENT_NODE:
+ expected = CommentBase
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ expected = EntityBase
+ elif c_node.type == tree.XML_PI_NODE:
+ expected = PIBase
+ else:
+ assert False, f"Unknown node type: {c_node.type}"
+
+ if not (isinstance(cls, type) and issubclass(cls, expected)):
+ raise TypeError(
+ f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}")
+ return 0
+
+
+################################################################################
+# Element class lookup
+
+ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*)
+
+# class to store element class lookup functions
+cdef public class ElementClassLookup [ type LxmlElementClassLookupType,
+ object LxmlElementClassLookup ]:
+ """ElementClassLookup(self)
+ Superclass of Element class lookups.
+ """
+ cdef _element_class_lookup_function _lookup_function
+
+
+cdef public class FallbackElementClassLookup(ElementClassLookup) \
+ [ type LxmlFallbackElementClassLookupType,
+ object LxmlFallbackElementClassLookup ]:
+ """FallbackElementClassLookup(self, fallback=None)
+
+ Superclass of Element class lookups with additional fallback.
+ """
+ cdef readonly ElementClassLookup fallback
+ cdef _element_class_lookup_function _fallback_function
+ def __cinit__(self):
+ # fall back to default lookup
+ self._fallback_function = _lookupDefaultElementClass
+
+ def __init__(self, ElementClassLookup fallback=None):
+ if fallback is not None:
+ self._setFallback(fallback)
+ else:
+ self._fallback_function = _lookupDefaultElementClass
+
+ cdef void _setFallback(self, ElementClassLookup lookup):
+ """Sets the fallback scheme for this lookup method.
+ """
+ self.fallback = lookup
+ self._fallback_function = lookup._lookup_function
+ if self._fallback_function is NULL:
+ self._fallback_function = _lookupDefaultElementClass
+
+ def set_fallback(self, ElementClassLookup lookup not None):
+ """set_fallback(self, lookup)
+
+ Sets the fallback scheme for this lookup method.
+ """
+ self._setFallback(lookup)
+
+cdef inline object _callLookupFallback(FallbackElementClassLookup lookup,
+ _Document doc, xmlNode* c_node):
+ return lookup._fallback_function(lookup.fallback, doc, c_node)
+
+
+################################################################################
+# default lookup scheme
+
+cdef class ElementDefaultClassLookup(ElementClassLookup):
+ """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None)
+ Element class lookup scheme that always returns the default Element
+ class.
+
+ The keyword arguments ``element``, ``comment``, ``pi`` and ``entity``
+ accept the respective Element classes.
+ """
+ cdef readonly object element_class
+ cdef readonly object comment_class
+ cdef readonly object pi_class
+ cdef readonly object entity_class
+ def __cinit__(self):
+ self._lookup_function = _lookupDefaultElementClass
+
+ def __init__(self, element=None, comment=None, pi=None, entity=None):
+ if element is None:
+ self.element_class = _Element
+ elif issubclass(element, ElementBase):
+ self.element_class = element
+ else:
+ raise TypeError, "element class must be subclass of ElementBase"
+
+ if comment is None:
+ self.comment_class = _Comment
+ elif issubclass(comment, CommentBase):
+ self.comment_class = comment
+ else:
+ raise TypeError, "comment class must be subclass of CommentBase"
+
+ if entity is None:
+ self.entity_class = _Entity
+ elif issubclass(entity, EntityBase):
+ self.entity_class = entity
+ else:
+ raise TypeError, "Entity class must be subclass of EntityBase"
+
+ if pi is None:
+ self.pi_class = None # special case, see below
+ elif issubclass(pi, PIBase):
+ self.pi_class = pi
+ else:
+ raise TypeError, "PI class must be subclass of PIBase"
+
+cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node):
+ "Trivial class lookup function that always returns the default class."
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ if state is not None:
+ return (<ElementDefaultClassLookup>state).element_class
+ else:
+ return _Element
+ elif c_node.type == tree.XML_COMMENT_NODE:
+ if state is not None:
+ return (<ElementDefaultClassLookup>state).comment_class
+ else:
+ return _Comment
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ if state is not None:
+ return (<ElementDefaultClassLookup>state).entity_class
+ else:
+ return _Entity
+ elif c_node.type == tree.XML_PI_NODE:
+ if state is None or (<ElementDefaultClassLookup>state).pi_class is None:
+ # special case XSLT-PI
+ if c_node.name is not NULL and c_node.content is not NULL:
+ if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0:
+ if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \
+ tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL:
+ return _XSLTProcessingInstruction
+ return _ProcessingInstruction
+ else:
+ return (<ElementDefaultClassLookup>state).pi_class
+ else:
+ assert False, f"Unknown node type: {c_node.type}"
+
+
+################################################################################
+# attribute based lookup scheme
+
+cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup):
+ """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None)
+ Checks an attribute of an Element and looks up the value in a
+ class dictionary.
+
+ Arguments:
+ - attribute name - '{ns}name' style string
+ - class mapping - Python dict mapping attribute values to Element classes
+ - fallback - optional fallback lookup mechanism
+
+ A None key in the class mapping will be checked if the attribute is
+ missing.
+ """
+ cdef object _class_mapping
+ cdef tuple _pytag
+ cdef const_xmlChar* _c_ns
+ cdef const_xmlChar* _c_name
+ def __cinit__(self):
+ self._lookup_function = _attribute_class_lookup
+
+ def __init__(self, attribute_name, class_mapping,
+ ElementClassLookup fallback=None):
+ self._pytag = _getNsTag(attribute_name)
+ ns, name = self._pytag
+ if ns is None:
+ self._c_ns = NULL
+ else:
+ self._c_ns = _xcstr(ns)
+ self._c_name = _xcstr(name)
+ self._class_mapping = dict(class_mapping)
+
+ FallbackElementClassLookup.__init__(self, fallback)
+
+cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node):
+ cdef AttributeBasedElementClassLookup lookup
+ cdef python.PyObject* dict_result
+
+ lookup = <AttributeBasedElementClassLookup>state
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ value = _attributeValueFromNsName(
+ c_node, lookup._c_ns, lookup._c_name)
+ dict_result = python.PyDict_GetItem(lookup._class_mapping, value)
+ if dict_result is not NULL:
+ cls = <object>dict_result
+ _validateNodeClass(c_node, cls)
+ return cls
+ return _callLookupFallback(lookup, doc, c_node)
+
+
+################################################################################
+# per-parser lookup scheme
+
+cdef class ParserBasedElementClassLookup(FallbackElementClassLookup):
+ """ParserBasedElementClassLookup(self, fallback=None)
+ Element class lookup based on the XML parser.
+ """
+ def __cinit__(self):
+ self._lookup_function = _parser_class_lookup
+
+cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node):
+ if doc._parser._class_lookup is not None:
+ return doc._parser._class_lookup._lookup_function(
+ doc._parser._class_lookup, doc, c_node)
+ return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node)
+
+
+################################################################################
+# custom class lookup based on node type, namespace, name
+
+cdef class CustomElementClassLookup(FallbackElementClassLookup):
+ """CustomElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
+
+ You can inherit from this class and override the method::
+
+ lookup(self, type, doc, namespace, name)
+
+ to lookup the element class for a node. Arguments of the method:
+ * type: one of 'element', 'comment', 'PI', 'entity'
+ * doc: document that the node is in
+ * namespace: namespace URI of the node (or None for comments/PIs/entities)
+ * name: name of the element/entity, None for comments, target for PIs
+
+ If you return None from this method, the fallback will be called.
+ """
+ def __cinit__(self):
+ self._lookup_function = _custom_class_lookup
+
+ def lookup(self, type, doc, namespace, name):
+ "lookup(self, type, doc, namespace, name)"
+ return None
+
+cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node):
+ cdef CustomElementClassLookup lookup
+
+ lookup = <CustomElementClassLookup>state
+
+ if c_node.type == tree.XML_ELEMENT_NODE:
+ element_type = "element"
+ elif c_node.type == tree.XML_COMMENT_NODE:
+ element_type = "comment"
+ elif c_node.type == tree.XML_PI_NODE:
+ element_type = "PI"
+ elif c_node.type == tree.XML_ENTITY_REF_NODE:
+ element_type = "entity"
+ else:
+ element_type = "element"
+ if c_node.name is NULL:
+ name = None
+ else:
+ name = funicode(c_node.name)
+ c_str = tree._getNs(c_node)
+ ns = funicode(c_str) if c_str is not NULL else None
+
+ cls = lookup.lookup(element_type, doc, ns, name)
+ if cls is not None:
+ _validateNodeClass(c_node, cls)
+ return cls
+ return _callLookupFallback(lookup, doc, c_node)
+
+
+################################################################################
+# read-only tree based class lookup
+
+cdef class PythonElementClassLookup(FallbackElementClassLookup):
+ """PythonElementClassLookup(self, fallback=None)
+ Element class lookup based on a subclass method.
+
+ This class lookup scheme allows access to the entire XML tree in
+ read-only mode. To use it, re-implement the ``lookup(self, doc,
+ root)`` method in a subclass::
+
+ from lxml import etree, pyclasslookup
+
+ class MyElementClass(etree.ElementBase):
+ honkey = True
+
+ class MyLookup(pyclasslookup.PythonElementClassLookup):
+ def lookup(self, doc, root):
+ if root.tag == "sometag":
+ return MyElementClass
+ else:
+ for child in root:
+ if child.tag == "someothertag":
+ return MyElementClass
+ # delegate to default
+ return None
+
+ If you return None from this method, the fallback will be called.
+
+ The first argument is the opaque document instance that contains
+ the Element. The second argument is a lightweight Element proxy
+ implementation that is only valid during the lookup. Do not try
+ to keep a reference to it. Once the lookup is done, the proxy
+ will be invalid.
+
+ Also, you cannot wrap such a read-only Element in an ElementTree,
+ and you must take care not to keep a reference to them outside of
+ the `lookup()` method.
+
+ Note that the API of the Element objects is not complete. It is
+ purely read-only and does not support all features of the normal
+ `lxml.etree` API (such as XPath, extended slicing or some
+ iteration methods).
+
+ See https://lxml.de/element_classes.html
+ """
+ def __cinit__(self):
+ self._lookup_function = _python_class_lookup
+
+ def lookup(self, doc, element):
+ """lookup(self, doc, element)
+
+ Override this method to implement your own lookup scheme.
+ """
+ return None
+
+cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node):
+ cdef PythonElementClassLookup lookup
+ cdef _ReadOnlyProxy proxy
+ lookup = <PythonElementClassLookup>state
+
+ proxy = _newReadOnlyProxy(None, c_node)
+ cls = lookup.lookup(doc, proxy)
+ _freeReadOnlyProxies(proxy)
+
+ if cls is not None:
+ _validateNodeClass(c_node, cls)
+ return cls
+ return _callLookupFallback(lookup, doc, c_node)
+
+################################################################################
+# Global setup
+
+cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS
+cdef object ELEMENT_CLASS_LOOKUP_STATE
+
+cdef void _setElementClassLookupFunction(
+ _element_class_lookup_function function, object state):
+ global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE
+ if function is NULL:
+ state = DEFAULT_ELEMENT_CLASS_LOOKUP
+ function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function
+
+ ELEMENT_CLASS_LOOKUP_STATE = state
+ LOOKUP_ELEMENT_CLASS = function
+
+def set_element_class_lookup(ElementClassLookup lookup = None):
+ """set_element_class_lookup(lookup = None)
+
+ Set the global element class lookup method.
+
+ This defines the main entry point for looking up element implementations.
+ The standard implementation uses the :class:`ParserBasedElementClassLookup`
+ to delegate to different lookup schemes for each parser.
+
+ .. warning::
+
+ This should only be changed by applications, not by library packages.
+ In most cases, parser specific lookups should be preferred,
+ which can be configured via
+ :meth:`~lxml.etree.XMLParser.set_element_class_lookup`
+ (and the same for HTML parsers).
+
+ Globally replacing the element class lookup by something other than a
+ :class:`ParserBasedElementClassLookup` will prevent parser specific lookup
+ schemes from working. Several tools rely on parser specific lookups,
+ including :mod:`lxml.html` and :mod:`lxml.objectify`.
+ """
+ if lookup is None or lookup._lookup_function is NULL:
+ _setElementClassLookupFunction(NULL, None)
+ else:
+ _setElementClassLookupFunction(lookup._lookup_function, lookup)
+
+# default setup: parser delegation
+cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP
+DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup()
+
+set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP)