aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/lxml/saxparser.pxi
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/saxparser.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/saxparser.pxi875
1 files changed, 875 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi b/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi
new file mode 100644
index 00000000..dc03df9a
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/saxparser.pxi
@@ -0,0 +1,875 @@
+# SAX-like interfaces
+
+class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError):
+ """
+ An XMLSyntaxError that additionally inherits from AssertionError for
+ ElementTree / backwards compatibility reasons.
+
+ This class may get replaced by a plain XMLSyntaxError in a future version.
+ """
+ def __init__(self, message):
+ XMLSyntaxError.__init__(self, message, None, 0, 1)
+
+
+ctypedef enum _SaxParserEvents:
+ SAX_EVENT_START = 1 << 0
+ SAX_EVENT_END = 1 << 1
+ SAX_EVENT_DATA = 1 << 2
+ SAX_EVENT_DOCTYPE = 1 << 3
+ SAX_EVENT_PI = 1 << 4
+ SAX_EVENT_COMMENT = 1 << 5
+ SAX_EVENT_START_NS = 1 << 6
+ SAX_EVENT_END_NS = 1 << 7
+
+ctypedef enum _ParseEventFilter:
+ PARSE_EVENT_FILTER_START = 1 << 0
+ PARSE_EVENT_FILTER_END = 1 << 1
+ PARSE_EVENT_FILTER_START_NS = 1 << 2
+ PARSE_EVENT_FILTER_END_NS = 1 << 3
+ PARSE_EVENT_FILTER_COMMENT = 1 << 4
+ PARSE_EVENT_FILTER_PI = 1 << 5
+
+
+cdef int _buildParseEventFilter(events) except -1:
+ cdef int event_filter = 0
+ for event in events:
+ if event == 'start':
+ event_filter |= PARSE_EVENT_FILTER_START
+ elif event == 'end':
+ event_filter |= PARSE_EVENT_FILTER_END
+ elif event == 'start-ns':
+ event_filter |= PARSE_EVENT_FILTER_START_NS
+ elif event == 'end-ns':
+ event_filter |= PARSE_EVENT_FILTER_END_NS
+ elif event == 'comment':
+ event_filter |= PARSE_EVENT_FILTER_COMMENT
+ elif event == 'pi':
+ event_filter |= PARSE_EVENT_FILTER_PI
+ else:
+ raise ValueError, f"invalid event name '{event}'"
+ return event_filter
+
+
+cdef class _SaxParserTarget:
+ cdef int _sax_event_filter
+
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
+ return None
+ cdef _handleSaxEnd(self, tag):
+ return None
+ cdef int _handleSaxData(self, data) except -1:
+ return 0
+ cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
+ return 0
+ cdef _handleSaxPi(self, target, data):
+ return None
+ cdef _handleSaxComment(self, comment):
+ return None
+ cdef _handleSaxStartNs(self, prefix, uri):
+ return None
+ cdef _handleSaxEndNs(self, prefix):
+ return None
+
+
+#@cython.final
+@cython.internal
+@cython.no_gc_clear # Required because parent class uses it - Cython bug.
+cdef class _SaxParserContext(_ParserContext):
+ """This class maps SAX2 events to parser target events.
+ """
+ cdef _SaxParserTarget _target
+ cdef _BaseParser _parser
+ cdef xmlparser.startElementNsSAX2Func _origSaxStart
+ cdef xmlparser.endElementNsSAX2Func _origSaxEnd
+ cdef xmlparser.startElementSAXFunc _origSaxStartNoNs
+ cdef xmlparser.endElementSAXFunc _origSaxEndNoNs
+ cdef xmlparser.charactersSAXFunc _origSaxData
+ cdef xmlparser.cdataBlockSAXFunc _origSaxCData
+ cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype
+ cdef xmlparser.commentSAXFunc _origSaxComment
+ cdef xmlparser.processingInstructionSAXFunc _origSaxPI
+ cdef xmlparser.startDocumentSAXFunc _origSaxStartDocument
+
+ # for event collecting
+ cdef int _event_filter
+ cdef list _ns_stack
+ cdef list _node_stack
+ cdef _ParseEventsIterator events_iterator
+
+ # for iterparse
+ cdef _Element _root
+ cdef _MultiTagMatcher _matcher
+
+ def __cinit__(self, _BaseParser parser):
+ self._ns_stack = []
+ self._node_stack = []
+ self._parser = parser
+ self.events_iterator = _ParseEventsIterator()
+
+ cdef void _setSaxParserTarget(self, _SaxParserTarget target) noexcept:
+ self._target = target
+
+ cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
+ _ParserContext._initParserContext(self, c_ctxt)
+ if self._target is not None:
+ self._connectTarget(c_ctxt)
+ elif self._event_filter:
+ self._connectEvents(c_ctxt)
+
+ cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
+ """Wrap original SAX2 callbacks to call into parser target.
+ """
+ sax = c_ctxt.sax
+ self._origSaxStart = sax.startElementNs = NULL
+ self._origSaxStartNoNs = sax.startElement = NULL
+ if self._target._sax_event_filter & (SAX_EVENT_START |
+ SAX_EVENT_START_NS |
+ SAX_EVENT_END_NS):
+ # intercept => overwrite orig callback
+ # FIXME: also intercept on when collecting END events
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+ sax.startElementNs = _handleSaxTargetStart
+ if self._target._sax_event_filter & SAX_EVENT_START:
+ sax.startElement = _handleSaxTargetStartNoNs
+
+ self._origSaxEnd = sax.endElementNs = NULL
+ self._origSaxEndNoNs = sax.endElement = NULL
+ if self._target._sax_event_filter & (SAX_EVENT_END |
+ SAX_EVENT_END_NS):
+ if sax.initialized == xmlparser.XML_SAX2_MAGIC:
+ sax.endElementNs = _handleSaxEnd
+ if self._target._sax_event_filter & SAX_EVENT_END:
+ sax.endElement = _handleSaxEndNoNs
+
+ self._origSaxData = sax.characters = sax.cdataBlock = NULL
+ if self._target._sax_event_filter & SAX_EVENT_DATA:
+ sax.characters = sax.cdataBlock = _handleSaxData
+
+ # doctype propagation is always required for entity replacement
+ self._origSaxDoctype = sax.internalSubset
+ if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
+ sax.internalSubset = _handleSaxTargetDoctype
+
+ self._origSaxPI = sax.processingInstruction = NULL
+ if self._target._sax_event_filter & SAX_EVENT_PI:
+ sax.processingInstruction = _handleSaxTargetPI
+
+ self._origSaxComment = sax.comment = NULL
+ if self._target._sax_event_filter & SAX_EVENT_COMMENT:
+ sax.comment = _handleSaxTargetComment
+
+ # enforce entity replacement
+ sax.reference = NULL
+ c_ctxt.replaceEntities = 1
+
+ cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
+ """Wrap original SAX2 callbacks to collect parse events without parser target.
+ """
+ sax = c_ctxt.sax
+ self._origSaxStartDocument = sax.startDocument
+ sax.startDocument = _handleSaxStartDocument
+
+ # only override "start" event handler if needed
+ self._origSaxStart = sax.startElementNs
+ if self._event_filter == 0 or c_ctxt.html or \
+ self._event_filter & (PARSE_EVENT_FILTER_START |
+ PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_START_NS |
+ PARSE_EVENT_FILTER_END_NS):
+ sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart
+
+ self._origSaxStartNoNs = sax.startElement
+ if self._event_filter == 0 or c_ctxt.html or \
+ self._event_filter & (PARSE_EVENT_FILTER_START |
+ PARSE_EVENT_FILTER_END):
+ sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs
+
+ # only override "end" event handler if needed
+ self._origSaxEnd = sax.endElementNs
+ if self._event_filter == 0 or \
+ self._event_filter & (PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_END_NS):
+ sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd
+
+ self._origSaxEndNoNs = sax.endElement
+ if self._event_filter == 0 or \
+ self._event_filter & PARSE_EVENT_FILTER_END:
+ sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs
+
+ self._origSaxComment = sax.comment
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
+ sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment
+
+ self._origSaxPI = sax.processingInstruction
+ if self._event_filter & PARSE_EVENT_FILTER_PI:
+ sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent
+
+ cdef _setEventFilter(self, events, tag):
+ self._event_filter = _buildParseEventFilter(events)
+ if not self._event_filter or tag is None or tag == '*':
+ self._matcher = None
+ else:
+ self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
+
+ cdef int startDocument(self, xmlDoc* c_doc) except -1:
+ try:
+ self._doc = _documentFactory(c_doc, self._parser)
+ finally:
+ self._parser = None # clear circular reference ASAP
+ if self._matcher is not None:
+ self._matcher.cacheTags(self._doc, True) # force entry in libxml2 dict
+ return 0
+
+ cdef int pushEvent(self, event, xmlNode* c_node) except -1:
+ cdef _Element root
+ if self._root is None:
+ root = self._doc.getroot()
+ if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE:
+ self._root = root
+ node = _elementFactory(self._doc, c_node)
+ self.events_iterator._events.append( (event, node) )
+ return 0
+
+ cdef int flushEvents(self) except -1:
+ events = self.events_iterator._events
+ while self._node_stack:
+ events.append( ('end', self._node_stack.pop()) )
+ _pushSaxNsEndEvents(self)
+ while self._ns_stack:
+ _pushSaxNsEndEvents(self)
+
+ cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
+ if c_ctxt.errNo == xmlerror.XML_ERR_OK:
+ c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
+ # stop parsing immediately
+ c_ctxt.wellFormed = 0
+ c_ctxt.disableSAX = 1
+ c_ctxt.instate = xmlparser.XML_PARSER_EOF
+ self._store_raised()
+
+
+@cython.final
+@cython.internal
+cdef class _ParseEventsIterator:
+ """A reusable parse events iterator"""
+ cdef list _events
+ cdef int _event_index
+
+ def __cinit__(self):
+ self._events = []
+ self._event_index = 0
+
+ def __iter__(self):
+ return self
+
+ def __next__(self):
+ cdef int event_index = self._event_index
+ events = self._events
+ if event_index >= 2**10 or event_index * 2 >= len(events):
+ if event_index:
+ # clean up from time to time
+ del events[:event_index]
+ self._event_index = event_index = 0
+ if event_index >= len(events):
+ raise StopIteration
+ item = events[event_index]
+ self._event_index = event_index + 1
+ return item
+
+
+cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces,
+ const_xmlChar** c_namespaces):
+ "Build [(prefix, uri)] list of declared namespaces."
+ cdef int i
+ namespaces = []
+ for i in xrange(c_nb_namespaces):
+ namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1])))
+ c_namespaces += 2
+ return namespaces
+
+
+cdef void _handleSaxStart(
+ void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
+ const_xmlChar* c_namespace, int c_nb_namespaces,
+ const_xmlChar** c_namespaces,
+ int c_nb_attributes, int c_nb_defaulted,
+ const_xmlChar** c_attributes) noexcept with gil:
+ cdef int i
+ cdef size_t c_len
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ cdef int event_filter = context._event_filter
+ try:
+ if (c_nb_namespaces and
+ event_filter & (PARSE_EVENT_FILTER_START_NS |
+ PARSE_EVENT_FILTER_END_NS)):
+ declared_namespaces = _build_prefix_uri_list(
+ context, c_nb_namespaces, c_namespaces)
+ if event_filter & PARSE_EVENT_FILTER_START_NS:
+ for prefix_uri_tuple in declared_namespaces:
+ context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
+ else:
+ declared_namespaces = None
+
+ context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
+ c_nb_namespaces, c_namespaces, c_nb_attributes,
+ c_nb_defaulted, c_attributes)
+ if c_ctxt.html:
+ _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
+ # The HTML parser in libxml2 reports the missing opening tags when it finds
+ # misplaced ones, but with tag names from C string constants that ignore the
+ # parser dict. Thus, we need to intern the name ourselves.
+ c_localname = tree.xmlDictLookup(c_ctxt.dict, c_localname, -1)
+ if c_localname is NULL:
+ raise MemoryError()
+
+ if event_filter & PARSE_EVENT_FILTER_END_NS:
+ context._ns_stack.append(declared_namespaces)
+ if event_filter & (PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_START):
+ _pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxTargetStart(
+ void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
+ const_xmlChar* c_namespace, int c_nb_namespaces,
+ const_xmlChar** c_namespaces,
+ int c_nb_attributes, int c_nb_defaulted,
+ const_xmlChar** c_attributes) noexcept with gil:
+ cdef int i
+ cdef size_t c_len
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+
+ cdef int event_filter = context._event_filter
+ cdef int sax_event_filter = context._target._sax_event_filter
+ try:
+ if c_nb_namespaces:
+ declared_namespaces = _build_prefix_uri_list(
+ context, c_nb_namespaces, c_namespaces)
+
+ if event_filter & PARSE_EVENT_FILTER_START_NS:
+ for prefix_uri_tuple in declared_namespaces:
+ context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
+
+ if sax_event_filter & SAX_EVENT_START_NS:
+ for prefix, uri in declared_namespaces:
+ context._target._handleSaxStartNs(prefix, uri)
+ else:
+ declared_namespaces = None
+
+ if sax_event_filter & SAX_EVENT_START:
+ if c_nb_defaulted > 0:
+ # only add default attributes if we asked for them
+ if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
+ c_nb_attributes -= c_nb_defaulted
+ if c_nb_attributes == 0:
+ attrib = IMMUTABLE_EMPTY_MAPPING
+ else:
+ attrib = {}
+ for i in xrange(c_nb_attributes):
+ name = _namespacedNameFromNsName(
+ c_attributes[2], c_attributes[0])
+ if c_attributes[3] is NULL:
+ value = ''
+ else:
+ c_len = c_attributes[4] - c_attributes[3]
+ value = c_attributes[3][:c_len].decode('utf8')
+ attrib[name] = value
+ c_attributes += 5
+
+ nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING
+
+ element = _callTargetSaxStart(
+ context, c_ctxt,
+ _namespacedNameFromNsName(c_namespace, c_localname),
+ attrib, nsmap)
+ else:
+ element = None
+
+ if (event_filter & PARSE_EVENT_FILTER_END_NS or
+ sax_event_filter & SAX_EVENT_END_NS):
+ context._ns_stack.append(declared_namespaces)
+ if event_filter & (PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_START):
+ _pushSaxStartEvent(context, c_ctxt, c_namespace,
+ c_localname, element)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
+ const_xmlChar** c_attributes) noexcept with gil:
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
+ if c_ctxt.html:
+ _fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
+ # The HTML parser in libxml2 reports the missing opening tags when it finds
+ # misplaced ones, but with tag names from C string constants that ignore the
+ # parser dict. Thus, we need to intern the name ourselves.
+ c_name = tree.xmlDictLookup(c_ctxt.dict, c_name, -1)
+ if c_name is NULL:
+ raise MemoryError()
+ if context._event_filter & (PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_START):
+ _pushSaxStartEvent(context, c_ctxt, NULL, c_name, None)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
+ const_xmlChar** c_attributes) noexcept with gil:
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ if c_attributes is NULL:
+ attrib = IMMUTABLE_EMPTY_MAPPING
+ else:
+ attrib = {}
+ while c_attributes[0] is not NULL:
+ name = funicode(c_attributes[0])
+ attrib[name] = funicodeOrEmpty(c_attributes[1])
+ c_attributes += 2
+ element = _callTargetSaxStart(
+ context, c_ctxt, funicode(c_name),
+ attrib, IMMUTABLE_EMPTY_MAPPING)
+ if context._event_filter & (PARSE_EVENT_FILTER_END |
+ PARSE_EVENT_FILTER_START):
+ _pushSaxStartEvent(context, c_ctxt, NULL, c_name, element)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef _callTargetSaxStart(_SaxParserContext context,
+ xmlparser.xmlParserCtxt* c_ctxt,
+ tag, attrib, nsmap):
+ element = context._target._handleSaxStart(tag, attrib, nsmap)
+ if element is not None and c_ctxt.input is not NULL:
+ if isinstance(element, _Element):
+ (<_Element>element)._c_node.line = (
+ <unsigned short>c_ctxt.input.line
+ if c_ctxt.input.line < 65535 else 65535)
+ return element
+
+
+cdef int _pushSaxStartEvent(_SaxParserContext context,
+ xmlparser.xmlParserCtxt* c_ctxt,
+ const_xmlChar* c_href,
+ const_xmlChar* c_name, node) except -1:
+ if (context._matcher is None or
+ context._matcher.matchesNsTag(c_href, c_name)):
+ if node is None and context._target is None:
+ assert context._doc is not None
+ node = _elementFactory(context._doc, c_ctxt.node)
+ if context._event_filter & PARSE_EVENT_FILTER_START:
+ context.events_iterator._events.append(('start', node))
+ if (context._target is None and
+ context._event_filter & PARSE_EVENT_FILTER_END):
+ context._node_stack.append(node)
+ return 0
+
+
+cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
+ const_xmlChar* c_prefix,
+ const_xmlChar* c_namespace) noexcept with gil:
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ if context._target is not None:
+ if context._target._sax_event_filter & SAX_EVENT_END:
+ node = context._target._handleSaxEnd(
+ _namespacedNameFromNsName(c_namespace, c_localname))
+ else:
+ node = None
+ else:
+ context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
+ node = None
+ _pushSaxEndEvent(context, c_namespace, c_localname, node)
+ _pushSaxNsEndEvents(context)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil:
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ if context._target is not None:
+ node = context._target._handleSaxEnd(funicode(c_name))
+ else:
+ context._origSaxEndNoNs(c_ctxt, c_name)
+ node = None
+ _pushSaxEndEvent(context, NULL, c_name, node)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1:
+ cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS
+ cdef bint call_target = (
+ context._target is not None
+ and context._target._sax_event_filter & SAX_EVENT_END_NS)
+ if not build_events and not call_target:
+ return 0
+
+ cdef list declared_namespaces = context._ns_stack.pop()
+ if declared_namespaces is None:
+ return 0
+
+ cdef tuple prefix_uri
+ for prefix_uri in reversed(declared_namespaces):
+ if call_target:
+ context._target._handleSaxEndNs(prefix_uri[0])
+ if build_events:
+ context.events_iterator._events.append(('end-ns', None))
+
+ return 0
+
+
+cdef int _pushSaxEndEvent(_SaxParserContext context,
+ const_xmlChar* c_href,
+ const_xmlChar* c_name, node) except -1:
+ if context._event_filter & PARSE_EVENT_FILTER_END:
+ if (context._matcher is None or
+ context._matcher.matchesNsTag(c_href, c_name)):
+ if context._target is None:
+ node = context._node_stack.pop()
+ context.events_iterator._events.append(('end', node))
+ return 0
+
+
+cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil:
+ # can only be called if parsing with a target
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ context._target._handleSaxData(
+ c_data[:data_len].decode('utf8'))
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
+ const_xmlChar* c_public,
+ const_xmlChar* c_system) noexcept with gil:
+ # can only be called if parsing with a target
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ context._target._handleSaxDoctype(
+ funicodeOrNone(c_name),
+ funicodeOrNone(c_public),
+ funicodeOrNone(c_system))
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil:
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ context._origSaxStartDocument(ctxt)
+ c_doc = c_ctxt.myDoc
+ try:
+ context.startDocument(c_doc)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
+ const_xmlChar* c_data) noexcept with gil:
+ # can only be called if parsing with a target
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ pi = context._target._handleSaxPi(
+ funicodeOrNone(c_target),
+ funicodeOrEmpty(c_data))
+ if context._event_filter & PARSE_EVENT_FILTER_PI:
+ context.events_iterator._events.append(('pi', pi))
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
+ const_xmlChar* data) noexcept with gil:
+ # can only be called when collecting pi events
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ context._origSaxPI(ctxt, target, data)
+ c_node = _findLastEventNode(c_ctxt)
+ if c_node is NULL:
+ return
+ try:
+ context.pushEvent('pi', c_node)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil:
+ # can only be called if parsing with a target
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ try:
+ comment = context._target._handleSaxComment(funicodeOrEmpty(c_data))
+ if context._event_filter & PARSE_EVENT_FILTER_COMMENT:
+ context.events_iterator._events.append(('comment', comment))
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil:
+ # can only be called when collecting comment events
+ c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
+ if c_ctxt._private is NULL or c_ctxt.disableSAX:
+ return
+ context = <_SaxParserContext>c_ctxt._private
+ context._origSaxComment(ctxt, text)
+ c_node = _findLastEventNode(c_ctxt)
+ if c_node is NULL:
+ return
+ try:
+ context.pushEvent('comment', c_node)
+ except:
+ context._handleSaxException(c_ctxt)
+ finally:
+ return # swallow any further exceptions
+
+
+cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt):
+ # this mimics what libxml2 creates for comments/PIs
+ if c_ctxt.inSubset == 1:
+ return c_ctxt.myDoc.intSubset.last
+ elif c_ctxt.inSubset == 2:
+ return c_ctxt.myDoc.extSubset.last
+ elif c_ctxt.node is NULL:
+ return c_ctxt.myDoc.last
+ elif c_ctxt.node.type == tree.XML_ELEMENT_NODE:
+ return c_ctxt.node.last
+ else:
+ return c_ctxt.node.next
+
+
+############################################################
+## ET compatible XML tree builder
+############################################################
+
+cdef class TreeBuilder(_SaxParserTarget):
+ """TreeBuilder(self, element_factory=None, parser=None,
+ comment_factory=None, pi_factory=None,
+ insert_comments=True, insert_pis=True)
+
+ Parser target that builds a tree from parse event callbacks.
+
+ The factory arguments can be used to influence the creation of
+ elements, comments and processing instructions.
+
+ By default, comments and processing instructions are inserted into
+ the tree, but they can be ignored by passing the respective flags.
+
+ The final tree is returned by the ``close()`` method.
+ """
+ cdef _BaseParser _parser
+ cdef object _factory
+ cdef object _comment_factory
+ cdef object _pi_factory
+ cdef list _data
+ cdef list _element_stack
+ cdef object _element_stack_pop
+ cdef _Element _last # may be None
+ cdef bint _in_tail
+ cdef bint _insert_comments
+ cdef bint _insert_pis
+
+ def __init__(self, *, element_factory=None, parser=None,
+ comment_factory=None, pi_factory=None,
+ bint insert_comments=True, bint insert_pis=True):
+ self._sax_event_filter = \
+ SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
+ SAX_EVENT_PI | SAX_EVENT_COMMENT
+ self._data = [] # data collector
+ self._element_stack = [] # element stack
+ self._element_stack_pop = self._element_stack.pop
+ self._last = None # last element
+ self._in_tail = 0 # true if we're after an end tag
+ self._factory = element_factory
+ self._comment_factory = comment_factory if comment_factory is not None else Comment
+ self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction
+ self._insert_comments = insert_comments
+ self._insert_pis = insert_pis
+ self._parser = parser
+
+ @cython.final
+ cdef int _flush(self) except -1:
+ if self._data:
+ if self._last is not None:
+ text = "".join(self._data)
+ if self._in_tail:
+ assert self._last.tail is None, "internal error (tail)"
+ self._last.tail = text
+ else:
+ assert self._last.text is None, "internal error (text)"
+ self._last.text = text
+ del self._data[:]
+ return 0
+
+ # internal SAX event handlers
+
+ @cython.final
+ cdef _handleSaxStart(self, tag, attrib, nsmap):
+ self._flush()
+ if self._factory is not None:
+ self._last = self._factory(tag, attrib)
+ if self._element_stack:
+ _appendChild(self._element_stack[-1], self._last)
+ elif self._element_stack:
+ self._last = _makeSubElement(
+ self._element_stack[-1], tag, None, None, attrib, nsmap, None)
+ else:
+ self._last = _makeElement(
+ tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
+ self._element_stack.append(self._last)
+ self._in_tail = 0
+ return self._last
+
+ @cython.final
+ cdef _handleSaxEnd(self, tag):
+ self._flush()
+ self._last = self._element_stack_pop()
+ self._in_tail = 1
+ return self._last
+
+ @cython.final
+ cdef int _handleSaxData(self, data) except -1:
+ self._data.append(data)
+
+ @cython.final
+ cdef _handleSaxPi(self, target, data):
+ elem = self._pi_factory(target, data)
+ if self._insert_pis:
+ self._flush()
+ self._last = elem
+ if self._element_stack:
+ _appendChild(self._element_stack[-1], self._last)
+ self._in_tail = 1
+ return self._last
+
+ @cython.final
+ cdef _handleSaxComment(self, comment):
+ elem = self._comment_factory(comment)
+ if self._insert_comments:
+ self._flush()
+ self._last = elem
+ if self._element_stack:
+ _appendChild(self._element_stack[-1], self._last)
+ self._in_tail = 1
+ return elem
+
+ # Python level event handlers
+
+ def close(self):
+ """close(self)
+
+ Flushes the builder buffers, and returns the toplevel document
+ element. Raises XMLSyntaxError on inconsistencies.
+ """
+ if self._element_stack:
+ raise XMLSyntaxAssertionError("missing end tags")
+ # TODO: this does not necessarily seem like an error case. Why not just return None?
+ if self._last is None:
+ raise XMLSyntaxAssertionError("missing toplevel element")
+ return self._last
+
+ def data(self, data):
+ """data(self, data)
+
+ Adds text to the current element. The value should be either an
+ 8-bit string containing ASCII text, or a Unicode string.
+ """
+ self._handleSaxData(data)
+
+ def start(self, tag, attrs, nsmap=None):
+ """start(self, tag, attrs, nsmap=None)
+
+ Opens a new element.
+ """
+ if nsmap is None:
+ nsmap = IMMUTABLE_EMPTY_MAPPING
+ return self._handleSaxStart(tag, attrs, nsmap)
+
+ def end(self, tag):
+ """end(self, tag)
+
+ Closes the current element.
+ """
+ element = self._handleSaxEnd(tag)
+ assert self._last.tag == tag,\
+ f"end tag mismatch (expected {self._last.tag}, got {tag})"
+ return element
+
+ def pi(self, target, data=None):
+ """pi(self, target, data=None)
+
+ Creates a processing instruction using the factory, appends it
+ (unless disabled) and returns it.
+ """
+ return self._handleSaxPi(target, data)
+
+ def comment(self, comment):
+ """comment(self, comment)
+
+ Creates a comment using the factory, appends it (unless disabled)
+ and returns it.
+ """
+ return self._handleSaxComment(comment)