diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/classlookup.pxi | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/classlookup.pxi')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lxml/classlookup.pxi | 580 |
1 files changed, 580 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi new file mode 100644 index 00000000..92d1d47a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/classlookup.pxi @@ -0,0 +1,580 @@ +# Configurable Element class lookup + +################################################################################ +# Custom Element classes + +cdef public class ElementBase(_Element) [ type LxmlElementBaseType, + object LxmlElementBase ]: + """ElementBase(*children, attrib=None, nsmap=None, **_extra) + + The public Element class. All custom Element classes must inherit + from this one. To create an Element, use the `Element()` factory. + + BIG FAT WARNING: Subclasses *must not* override __init__ or + __new__ as it is absolutely undefined when these objects will be + created or destroyed. All persistent state of Elements must be + stored in the underlying XML. If you really need to initialize + the object after creation, you can implement an ``_init(self)`` + method that will be called directly after object creation. + + Subclasses of this class can be instantiated to create a new + Element. By default, the tag name will be the class name and the + namespace will be empty. You can modify this with the following + class attributes: + + * TAG - the tag name, possibly containing a namespace in Clark + notation + + * NAMESPACE - the default namespace URI, unless provided as part + of the TAG attribute. + + * HTML - flag if the class is an HTML tag, as opposed to an XML + tag. This only applies to un-namespaced tags and defaults to + false (i.e. XML). + + * PARSER - the parser that provides the configuration for the + newly created document. Providing an HTML parser here will + default to creating an HTML element. + + In user code, the latter three are commonly inherited in class + hierarchies that implement a common namespace. + """ + def __init__(self, *children, attrib=None, nsmap=None, **_extra): + """ElementBase(*children, attrib=None, nsmap=None, **_extra) + """ + cdef bint is_html = 0 + cdef _BaseParser parser + cdef _Element last_child + # don't use normal attribute access as it might be overridden + _getattr = object.__getattribute__ + try: + namespace = _utf8(_getattr(self, 'NAMESPACE')) + except AttributeError: + namespace = None + try: + ns, tag = _getNsTag(_getattr(self, 'TAG')) + if ns is not None: + namespace = ns + except AttributeError: + tag = _utf8(_getattr(_getattr(self, '__class__'), '__name__')) + if b'.' in tag: + tag = tag.split(b'.')[-1] + try: + parser = _getattr(self, 'PARSER') + except AttributeError: + parser = None + for child in children: + if isinstance(child, _Element): + parser = (<_Element>child)._doc._parser + break + if isinstance(parser, HTMLParser): + is_html = 1 + if namespace is None: + try: + is_html = _getattr(self, 'HTML') + except AttributeError: + pass + _initNewElement(self, is_html, tag, namespace, parser, + attrib, nsmap, _extra) + last_child = None + for child in children: + if _isString(child): + if last_child is None: + _setNodeText(self._c_node, + (_collectText(self._c_node.children) or '') + child) + else: + _setTailText(last_child._c_node, + (_collectText(last_child._c_node.next) or '') + child) + elif isinstance(child, _Element): + last_child = child + _appendChild(self, last_child) + elif isinstance(child, type) and issubclass(child, ElementBase): + last_child = child() + _appendChild(self, last_child) + else: + raise TypeError, f"Invalid child type: {type(child)!r}" + +cdef class CommentBase(_Comment): + """All custom Comment classes must inherit from this one. + + To create an XML Comment instance, use the ``Comment()`` factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of Comments must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, text): + # copied from Comment() factory + cdef _Document doc + cdef xmlDoc* c_doc + if text is None: + text = b'' + else: + text = _utf8(text) + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createComment(c_doc, _xcstr(text)) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + +cdef class PIBase(_ProcessingInstruction): + """All custom Processing Instruction classes must inherit from this one. + + To create an XML ProcessingInstruction instance, use the ``PI()`` + factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of PIs must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, target, text=None): + # copied from PI() factory + cdef _Document doc + cdef xmlDoc* c_doc + target = _utf8(target) + if text is None: + text = b'' + else: + text = _utf8(text) + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createPI(c_doc, _xcstr(target), _xcstr(text)) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + +cdef class EntityBase(_Entity): + """All custom Entity classes must inherit from this one. + + To create an XML Entity instance, use the ``Entity()`` factory. + + Subclasses *must not* override __init__ or __new__ as it is + absolutely undefined when these objects will be created or + destroyed. All persistent state of Entities must be stored in the + underlying XML. If you really need to initialize the object after + creation, you can implement an ``_init(self)`` method that will be + called after object creation. + """ + def __init__(self, name): + cdef _Document doc + cdef xmlDoc* c_doc + name_utf = _utf8(name) + c_name = _xcstr(name_utf) + if c_name[0] == c'#': + if not _characterReferenceIsValid(c_name + 1): + raise ValueError, f"Invalid character reference: '{name}'" + elif not _xmlNameIsValid(c_name): + raise ValueError, f"Invalid entity reference: '{name}'" + c_doc = _newXMLDoc() + doc = _documentFactory(c_doc, None) + self._c_node = _createEntity(c_doc, c_name) + if self._c_node is NULL: + raise MemoryError() + tree.xmlAddChild(<xmlNode*>c_doc, self._c_node) + _registerProxy(self, doc, self._c_node) + self._init() + + +cdef int _validateNodeClass(xmlNode* c_node, cls) except -1: + if c_node.type == tree.XML_ELEMENT_NODE: + expected = ElementBase + elif c_node.type == tree.XML_COMMENT_NODE: + expected = CommentBase + elif c_node.type == tree.XML_ENTITY_REF_NODE: + expected = EntityBase + elif c_node.type == tree.XML_PI_NODE: + expected = PIBase + else: + assert False, f"Unknown node type: {c_node.type}" + + if not (isinstance(cls, type) and issubclass(cls, expected)): + raise TypeError( + f"result of class lookup must be subclass of {type(expected)}, got {type(cls)}") + return 0 + + +################################################################################ +# Element class lookup + +ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) + +# class to store element class lookup functions +cdef public class ElementClassLookup [ type LxmlElementClassLookupType, + object LxmlElementClassLookup ]: + """ElementClassLookup(self) + Superclass of Element class lookups. + """ + cdef _element_class_lookup_function _lookup_function + + +cdef public class FallbackElementClassLookup(ElementClassLookup) \ + [ type LxmlFallbackElementClassLookupType, + object LxmlFallbackElementClassLookup ]: + """FallbackElementClassLookup(self, fallback=None) + + Superclass of Element class lookups with additional fallback. + """ + cdef readonly ElementClassLookup fallback + cdef _element_class_lookup_function _fallback_function + def __cinit__(self): + # fall back to default lookup + self._fallback_function = _lookupDefaultElementClass + + def __init__(self, ElementClassLookup fallback=None): + if fallback is not None: + self._setFallback(fallback) + else: + self._fallback_function = _lookupDefaultElementClass + + cdef void _setFallback(self, ElementClassLookup lookup): + """Sets the fallback scheme for this lookup method. + """ + self.fallback = lookup + self._fallback_function = lookup._lookup_function + if self._fallback_function is NULL: + self._fallback_function = _lookupDefaultElementClass + + def set_fallback(self, ElementClassLookup lookup not None): + """set_fallback(self, lookup) + + Sets the fallback scheme for this lookup method. + """ + self._setFallback(lookup) + +cdef inline object _callLookupFallback(FallbackElementClassLookup lookup, + _Document doc, xmlNode* c_node): + return lookup._fallback_function(lookup.fallback, doc, c_node) + + +################################################################################ +# default lookup scheme + +cdef class ElementDefaultClassLookup(ElementClassLookup): + """ElementDefaultClassLookup(self, element=None, comment=None, pi=None, entity=None) + Element class lookup scheme that always returns the default Element + class. + + The keyword arguments ``element``, ``comment``, ``pi`` and ``entity`` + accept the respective Element classes. + """ + cdef readonly object element_class + cdef readonly object comment_class + cdef readonly object pi_class + cdef readonly object entity_class + def __cinit__(self): + self._lookup_function = _lookupDefaultElementClass + + def __init__(self, element=None, comment=None, pi=None, entity=None): + if element is None: + self.element_class = _Element + elif issubclass(element, ElementBase): + self.element_class = element + else: + raise TypeError, "element class must be subclass of ElementBase" + + if comment is None: + self.comment_class = _Comment + elif issubclass(comment, CommentBase): + self.comment_class = comment + else: + raise TypeError, "comment class must be subclass of CommentBase" + + if entity is None: + self.entity_class = _Entity + elif issubclass(entity, EntityBase): + self.entity_class = entity + else: + raise TypeError, "Entity class must be subclass of EntityBase" + + if pi is None: + self.pi_class = None # special case, see below + elif issubclass(pi, PIBase): + self.pi_class = pi + else: + raise TypeError, "PI class must be subclass of PIBase" + +cdef object _lookupDefaultElementClass(state, _Document _doc, xmlNode* c_node): + "Trivial class lookup function that always returns the default class." + if c_node.type == tree.XML_ELEMENT_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).element_class + else: + return _Element + elif c_node.type == tree.XML_COMMENT_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).comment_class + else: + return _Comment + elif c_node.type == tree.XML_ENTITY_REF_NODE: + if state is not None: + return (<ElementDefaultClassLookup>state).entity_class + else: + return _Entity + elif c_node.type == tree.XML_PI_NODE: + if state is None or (<ElementDefaultClassLookup>state).pi_class is None: + # special case XSLT-PI + if c_node.name is not NULL and c_node.content is not NULL: + if tree.xmlStrcmp(c_node.name, <unsigned char*>"xml-stylesheet") == 0: + if tree.xmlStrstr(c_node.content, <unsigned char*>"text/xsl") is not NULL or \ + tree.xmlStrstr(c_node.content, <unsigned char*>"text/xml") is not NULL: + return _XSLTProcessingInstruction + return _ProcessingInstruction + else: + return (<ElementDefaultClassLookup>state).pi_class + else: + assert False, f"Unknown node type: {c_node.type}" + + +################################################################################ +# attribute based lookup scheme + +cdef class AttributeBasedElementClassLookup(FallbackElementClassLookup): + """AttributeBasedElementClassLookup(self, attribute_name, class_mapping, fallback=None) + Checks an attribute of an Element and looks up the value in a + class dictionary. + + Arguments: + - attribute name - '{ns}name' style string + - class mapping - Python dict mapping attribute values to Element classes + - fallback - optional fallback lookup mechanism + + A None key in the class mapping will be checked if the attribute is + missing. + """ + cdef object _class_mapping + cdef tuple _pytag + cdef const_xmlChar* _c_ns + cdef const_xmlChar* _c_name + def __cinit__(self): + self._lookup_function = _attribute_class_lookup + + def __init__(self, attribute_name, class_mapping, + ElementClassLookup fallback=None): + self._pytag = _getNsTag(attribute_name) + ns, name = self._pytag + if ns is None: + self._c_ns = NULL + else: + self._c_ns = _xcstr(ns) + self._c_name = _xcstr(name) + self._class_mapping = dict(class_mapping) + + FallbackElementClassLookup.__init__(self, fallback) + +cdef object _attribute_class_lookup(state, _Document doc, xmlNode* c_node): + cdef AttributeBasedElementClassLookup lookup + cdef python.PyObject* dict_result + + lookup = <AttributeBasedElementClassLookup>state + if c_node.type == tree.XML_ELEMENT_NODE: + value = _attributeValueFromNsName( + c_node, lookup._c_ns, lookup._c_name) + dict_result = python.PyDict_GetItem(lookup._class_mapping, value) + if dict_result is not NULL: + cls = <object>dict_result + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + + +################################################################################ +# per-parser lookup scheme + +cdef class ParserBasedElementClassLookup(FallbackElementClassLookup): + """ParserBasedElementClassLookup(self, fallback=None) + Element class lookup based on the XML parser. + """ + def __cinit__(self): + self._lookup_function = _parser_class_lookup + +cdef object _parser_class_lookup(state, _Document doc, xmlNode* c_node): + if doc._parser._class_lookup is not None: + return doc._parser._class_lookup._lookup_function( + doc._parser._class_lookup, doc, c_node) + return _callLookupFallback(<FallbackElementClassLookup>state, doc, c_node) + + +################################################################################ +# custom class lookup based on node type, namespace, name + +cdef class CustomElementClassLookup(FallbackElementClassLookup): + """CustomElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. + + You can inherit from this class and override the method:: + + lookup(self, type, doc, namespace, name) + + to lookup the element class for a node. Arguments of the method: + * type: one of 'element', 'comment', 'PI', 'entity' + * doc: document that the node is in + * namespace: namespace URI of the node (or None for comments/PIs/entities) + * name: name of the element/entity, None for comments, target for PIs + + If you return None from this method, the fallback will be called. + """ + def __cinit__(self): + self._lookup_function = _custom_class_lookup + + def lookup(self, type, doc, namespace, name): + "lookup(self, type, doc, namespace, name)" + return None + +cdef object _custom_class_lookup(state, _Document doc, xmlNode* c_node): + cdef CustomElementClassLookup lookup + + lookup = <CustomElementClassLookup>state + + if c_node.type == tree.XML_ELEMENT_NODE: + element_type = "element" + elif c_node.type == tree.XML_COMMENT_NODE: + element_type = "comment" + elif c_node.type == tree.XML_PI_NODE: + element_type = "PI" + elif c_node.type == tree.XML_ENTITY_REF_NODE: + element_type = "entity" + else: + element_type = "element" + if c_node.name is NULL: + name = None + else: + name = funicode(c_node.name) + c_str = tree._getNs(c_node) + ns = funicode(c_str) if c_str is not NULL else None + + cls = lookup.lookup(element_type, doc, ns, name) + if cls is not None: + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + + +################################################################################ +# read-only tree based class lookup + +cdef class PythonElementClassLookup(FallbackElementClassLookup): + """PythonElementClassLookup(self, fallback=None) + Element class lookup based on a subclass method. + + This class lookup scheme allows access to the entire XML tree in + read-only mode. To use it, re-implement the ``lookup(self, doc, + root)`` method in a subclass:: + + from lxml import etree, pyclasslookup + + class MyElementClass(etree.ElementBase): + honkey = True + + class MyLookup(pyclasslookup.PythonElementClassLookup): + def lookup(self, doc, root): + if root.tag == "sometag": + return MyElementClass + else: + for child in root: + if child.tag == "someothertag": + return MyElementClass + # delegate to default + return None + + If you return None from this method, the fallback will be called. + + The first argument is the opaque document instance that contains + the Element. The second argument is a lightweight Element proxy + implementation that is only valid during the lookup. Do not try + to keep a reference to it. Once the lookup is done, the proxy + will be invalid. + + Also, you cannot wrap such a read-only Element in an ElementTree, + and you must take care not to keep a reference to them outside of + the `lookup()` method. + + Note that the API of the Element objects is not complete. It is + purely read-only and does not support all features of the normal + `lxml.etree` API (such as XPath, extended slicing or some + iteration methods). + + See https://lxml.de/element_classes.html + """ + def __cinit__(self): + self._lookup_function = _python_class_lookup + + def lookup(self, doc, element): + """lookup(self, doc, element) + + Override this method to implement your own lookup scheme. + """ + return None + +cdef object _python_class_lookup(state, _Document doc, tree.xmlNode* c_node): + cdef PythonElementClassLookup lookup + cdef _ReadOnlyProxy proxy + lookup = <PythonElementClassLookup>state + + proxy = _newReadOnlyProxy(None, c_node) + cls = lookup.lookup(doc, proxy) + _freeReadOnlyProxies(proxy) + + if cls is not None: + _validateNodeClass(c_node, cls) + return cls + return _callLookupFallback(lookup, doc, c_node) + +################################################################################ +# Global setup + +cdef _element_class_lookup_function LOOKUP_ELEMENT_CLASS +cdef object ELEMENT_CLASS_LOOKUP_STATE + +cdef void _setElementClassLookupFunction( + _element_class_lookup_function function, object state): + global LOOKUP_ELEMENT_CLASS, ELEMENT_CLASS_LOOKUP_STATE + if function is NULL: + state = DEFAULT_ELEMENT_CLASS_LOOKUP + function = DEFAULT_ELEMENT_CLASS_LOOKUP._lookup_function + + ELEMENT_CLASS_LOOKUP_STATE = state + LOOKUP_ELEMENT_CLASS = function + +def set_element_class_lookup(ElementClassLookup lookup = None): + """set_element_class_lookup(lookup = None) + + Set the global element class lookup method. + + This defines the main entry point for looking up element implementations. + The standard implementation uses the :class:`ParserBasedElementClassLookup` + to delegate to different lookup schemes for each parser. + + .. warning:: + + This should only be changed by applications, not by library packages. + In most cases, parser specific lookups should be preferred, + which can be configured via + :meth:`~lxml.etree.XMLParser.set_element_class_lookup` + (and the same for HTML parsers). + + Globally replacing the element class lookup by something other than a + :class:`ParserBasedElementClassLookup` will prevent parser specific lookup + schemes from working. Several tools rely on parser specific lookups, + including :mod:`lxml.html` and :mod:`lxml.objectify`. + """ + if lookup is None or lookup._lookup_function is NULL: + _setElementClassLookupFunction(NULL, None) + else: + _setElementClassLookupFunction(lookup._lookup_function, lookup) + +# default setup: parser delegation +cdef ParserBasedElementClassLookup DEFAULT_ELEMENT_CLASS_LOOKUP +DEFAULT_ELEMENT_CLASS_LOOKUP = ParserBasedElementClassLookup() + +set_element_class_lookup(DEFAULT_ELEMENT_CLASS_LOOKUP) |