about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/lxml/dtd.pxi
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/dtd.pxi')
-rw-r--r--.venv/lib/python3.12/site-packages/lxml/dtd.pxi479
1 files changed, 479 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/dtd.pxi b/.venv/lib/python3.12/site-packages/lxml/dtd.pxi
new file mode 100644
index 00000000..ee1b3d47
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/lxml/dtd.pxi
@@ -0,0 +1,479 @@
+# support for DTD validation
+from lxml.includes cimport dtdvalid
+
+cdef class DTDError(LxmlError):
+    """Base class for DTD errors.
+    """
+
+cdef class DTDParseError(DTDError):
+    """Error while parsing a DTD.
+    """
+
+cdef class DTDValidateError(DTDError):
+    """Error while validating an XML document with a DTD.
+    """
+
+
+cdef inline int _assertValidDTDNode(node, void *c_node) except -1:
+    assert c_node is not NULL, "invalid DTD proxy at %s" % id(node)
+
+
+@cython.final
+@cython.internal
+@cython.freelist(8)
+cdef class _DTDElementContentDecl:
+    cdef DTD _dtd
+    cdef tree.xmlElementContent* _c_node
+
+    def __repr__(self):
+        return "<%s.%s object name=%r type=%r occur=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.type, self.occur, id(self))
+
+    @property
+    def name(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.name)
+
+    @property
+    def type(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef int type = self._c_node.type
+       if type == tree.XML_ELEMENT_CONTENT_PCDATA:
+           return "pcdata"
+       elif type == tree.XML_ELEMENT_CONTENT_ELEMENT:
+           return "element"
+       elif type == tree.XML_ELEMENT_CONTENT_SEQ:
+           return "seq"
+       elif type == tree.XML_ELEMENT_CONTENT_OR:
+           return "or"
+       else:
+           return None
+
+    @property
+    def occur(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef int occur = self._c_node.ocur
+       if occur == tree.XML_ELEMENT_CONTENT_ONCE:
+           return "once"
+       elif occur == tree.XML_ELEMENT_CONTENT_OPT:
+           return "opt"
+       elif occur == tree.XML_ELEMENT_CONTENT_MULT:
+           return "mult"
+       elif occur == tree.XML_ELEMENT_CONTENT_PLUS:
+           return "plus"
+       else:
+           return None
+
+    @property
+    def left(self):
+       _assertValidDTDNode(self, self._c_node)
+       c1 = self._c_node.c1
+       if c1:
+           node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
+           node._dtd = self._dtd
+           node._c_node = <tree.xmlElementContent*>c1
+           return node
+       else:
+           return None
+
+    @property
+    def right(self):
+       _assertValidDTDNode(self, self._c_node)
+       c2 = self._c_node.c2
+       if c2:
+           node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
+           node._dtd = self._dtd
+           node._c_node = <tree.xmlElementContent*>c2
+           return node
+       else:
+           return None
+
+
+@cython.final
+@cython.internal
+@cython.freelist(8)
+cdef class _DTDAttributeDecl:
+    cdef DTD _dtd
+    cdef tree.xmlAttribute* _c_node
+
+    def __repr__(self):
+        return "<%s.%s object name=%r elemname=%r prefix=%r type=%r default=%r default_value=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.elemname, self.prefix, self.type, self.default, self.default_value, id(self))
+
+    @property
+    def name(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.name)
+
+    @property
+    def elemname(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.elem)
+
+    @property
+    def prefix(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.prefix)
+
+    @property
+    def type(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef int type = self._c_node.atype
+       if type == tree.XML_ATTRIBUTE_CDATA:
+           return "cdata"
+       elif type == tree.XML_ATTRIBUTE_ID:
+           return "id"
+       elif type == tree.XML_ATTRIBUTE_IDREF:
+           return "idref"
+       elif type == tree.XML_ATTRIBUTE_IDREFS:
+           return "idrefs"
+       elif type == tree.XML_ATTRIBUTE_ENTITY:
+           return "entity"
+       elif type == tree.XML_ATTRIBUTE_ENTITIES:
+           return "entities"
+       elif type == tree.XML_ATTRIBUTE_NMTOKEN:
+           return "nmtoken"
+       elif type == tree.XML_ATTRIBUTE_NMTOKENS:
+           return "nmtokens"
+       elif type == tree.XML_ATTRIBUTE_ENUMERATION:
+           return "enumeration"
+       elif type == tree.XML_ATTRIBUTE_NOTATION:
+           return "notation"
+       else:
+           return None
+
+    @property
+    def default(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef int default = self._c_node.def_
+       if default == tree.XML_ATTRIBUTE_NONE:
+           return "none"
+       elif default == tree.XML_ATTRIBUTE_REQUIRED:
+           return "required"
+       elif default == tree.XML_ATTRIBUTE_IMPLIED:
+           return "implied"
+       elif default == tree.XML_ATTRIBUTE_FIXED:
+           return "fixed"
+       else:
+           return None
+
+    @property
+    def default_value(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.defaultValue)
+
+    def itervalues(self):
+        _assertValidDTDNode(self, self._c_node)
+        cdef tree.xmlEnumeration *c_node = self._c_node.tree
+        while c_node is not NULL:
+            yield funicode(c_node.name)
+            c_node = c_node.next
+
+    def values(self):
+        return list(self.itervalues())
+
+
+@cython.final
+@cython.internal
+@cython.freelist(8)
+cdef class _DTDElementDecl:
+    cdef DTD _dtd
+    cdef tree.xmlElement* _c_node
+
+    def __repr__(self):
+        return "<%s.%s object name=%r prefix=%r type=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, self.prefix, self.type, id(self))
+
+    @property
+    def name(self):
+        _assertValidDTDNode(self, self._c_node)
+        return funicodeOrNone(self._c_node.name)
+
+    @property
+    def prefix(self):
+       _assertValidDTDNode(self, self._c_node)
+       return funicodeOrNone(self._c_node.prefix)
+
+    @property
+    def type(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef int type = self._c_node.etype
+       if type == tree.XML_ELEMENT_TYPE_UNDEFINED:
+           return "undefined"
+       elif type == tree.XML_ELEMENT_TYPE_EMPTY:
+           return "empty"
+       elif type == tree.XML_ELEMENT_TYPE_ANY:
+           return "any"
+       elif type == tree.XML_ELEMENT_TYPE_MIXED:
+           return "mixed"
+       elif type == tree.XML_ELEMENT_TYPE_ELEMENT:
+           return "element"
+       else:
+           return None
+
+    @property
+    def content(self):
+       _assertValidDTDNode(self, self._c_node)
+       cdef tree.xmlElementContent *content = self._c_node.content
+       if content:
+           node = <_DTDElementContentDecl>_DTDElementContentDecl.__new__(_DTDElementContentDecl)
+           node._dtd = self._dtd
+           node._c_node = content
+           return node
+       else:
+           return None
+
+    def iterattributes(self):
+        _assertValidDTDNode(self, self._c_node)
+        cdef tree.xmlAttribute *c_node = self._c_node.attributes
+        while c_node:
+            node = <_DTDAttributeDecl>_DTDAttributeDecl.__new__(_DTDAttributeDecl)
+            node._dtd = self._dtd
+            node._c_node = c_node
+            yield node
+            c_node = c_node.nexth
+
+    def attributes(self):
+        return list(self.iterattributes())
+
+
+@cython.final
+@cython.internal
+@cython.freelist(8)
+cdef class _DTDEntityDecl:
+    cdef DTD _dtd
+    cdef tree.xmlEntity* _c_node
+    def __repr__(self):
+        return "<%s.%s object name=%r at 0x%x>" % (self.__class__.__module__, self.__class__.__name__, self.name, id(self))
+
+    @property
+    def name(self):
+        _assertValidDTDNode(self, self._c_node)
+        return funicodeOrNone(self._c_node.name)
+
+    @property
+    def orig(self):
+        _assertValidDTDNode(self, self._c_node)
+        return funicodeOrNone(self._c_node.orig)
+
+    @property
+    def content(self):
+        _assertValidDTDNode(self, self._c_node)
+        return funicodeOrNone(self._c_node.content)
+
+    @property
+    def system_url(self):
+        _assertValidDTDNode(self, self._c_node)
+        return funicodeOrNone(self._c_node.SystemID)
+
+
+################################################################################
+# DTD
+
+cdef class DTD(_Validator):
+    """DTD(self, file=None, external_id=None)
+    A DTD validator.
+
+    Can load from filesystem directly given a filename or file-like object.
+    Alternatively, pass the keyword parameter ``external_id`` to load from a
+    catalog.
+    """
+    cdef tree.xmlDtd* _c_dtd
+    def __init__(self, file=None, *, external_id=None):
+        _Validator.__init__(self)
+        if file is not None:
+            file = _getFSPathOrObject(file)
+            if _isString(file):
+                file = _encodeFilename(file)
+                with self._error_log:
+                    orig_loader = _register_document_loader()
+                    self._c_dtd = xmlparser.xmlParseDTD(NULL, _xcstr(file))
+                    _reset_document_loader(orig_loader)
+            elif hasattr(file, 'read'):
+                orig_loader = _register_document_loader()
+                self._c_dtd = _parseDtdFromFilelike(file)
+                _reset_document_loader(orig_loader)
+            else:
+                raise DTDParseError, "file must be a filename, file-like or path-like object"
+        elif external_id is not None:
+            external_id_utf = _utf8(external_id)
+            with self._error_log:
+                orig_loader = _register_document_loader()
+                self._c_dtd = xmlparser.xmlParseDTD(<const_xmlChar*>external_id_utf, NULL)
+                _reset_document_loader(orig_loader)
+        else:
+            raise DTDParseError, "either filename or external ID required"
+
+        if self._c_dtd is NULL:
+            raise DTDParseError(
+                self._error_log._buildExceptionMessage("error parsing DTD"),
+                self._error_log)
+
+    @property
+    def name(self):
+       if self._c_dtd is NULL:
+           return None
+       return funicodeOrNone(self._c_dtd.name)
+
+    @property
+    def external_id(self):
+       if self._c_dtd is NULL:
+           return None
+       return funicodeOrNone(self._c_dtd.ExternalID)
+
+    @property
+    def system_url(self):
+       if self._c_dtd is NULL:
+           return None
+       return funicodeOrNone(self._c_dtd.SystemID)
+
+    def iterelements(self):
+        cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
+        while c_node is not NULL:
+            if c_node.type == tree.XML_ELEMENT_DECL:
+                node = _DTDElementDecl()
+                node._dtd = self
+                node._c_node = <tree.xmlElement*>c_node
+                yield node
+            c_node = c_node.next
+
+    def elements(self):
+        return list(self.iterelements())
+
+    def iterentities(self):
+        cdef tree.xmlNode *c_node = self._c_dtd.children if self._c_dtd is not NULL else NULL
+        while c_node is not NULL:
+            if c_node.type == tree.XML_ENTITY_DECL:
+                node = _DTDEntityDecl()
+                node._dtd = self
+                node._c_node = <tree.xmlEntity*>c_node
+                yield node
+            c_node = c_node.next
+
+    def entities(self):
+        return list(self.iterentities())
+
+    def __dealloc__(self):
+        tree.xmlFreeDtd(self._c_dtd)
+
+    def __call__(self, etree):
+        """__call__(self, etree)
+
+        Validate doc using the DTD.
+
+        Returns true if the document is valid, false if not.
+        """
+        cdef _Document doc
+        cdef _Element root_node
+        cdef xmlDoc* c_doc
+        cdef dtdvalid.xmlValidCtxt* valid_ctxt
+        cdef int ret = -1
+
+        assert self._c_dtd is not NULL, "DTD not initialised"
+        doc = _documentOrRaise(etree)
+        root_node = _rootNodeOrRaise(etree)
+
+        valid_ctxt = dtdvalid.xmlNewValidCtxt()
+        if valid_ctxt is NULL:
+            raise DTDError("Failed to create validation context")
+
+        # work around error reporting bug in libxml2 <= 2.9.1 (and later?)
+        # https://bugzilla.gnome.org/show_bug.cgi?id=724903
+        valid_ctxt.error = <dtdvalid.xmlValidityErrorFunc>_nullGenericErrorFunc
+        valid_ctxt.userData = NULL
+
+        try:
+            with self._error_log:
+                c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
+                ret = dtdvalid.xmlValidateDtd(valid_ctxt, c_doc, self._c_dtd)
+                _destroyFakeDoc(doc._c_doc, c_doc)
+        finally:
+            dtdvalid.xmlFreeValidCtxt(valid_ctxt)
+
+        if ret == -1:
+            raise DTDValidateError("Internal error in DTD validation",
+                                   self._error_log)
+        return ret == 1
+
+
+cdef tree.xmlDtd* _parseDtdFromFilelike(file) except NULL:
+    cdef _ExceptionContext exc_context
+    cdef _FileReaderContext dtd_parser
+    cdef _ErrorLog error_log
+    cdef tree.xmlDtd* c_dtd = NULL
+    exc_context = _ExceptionContext()
+    dtd_parser = _FileReaderContext(file, exc_context, None)
+    error_log = _ErrorLog()
+
+    with error_log:
+        c_dtd = dtd_parser._readDtd()
+
+    exc_context._raise_if_stored()
+    if c_dtd is NULL:
+        raise DTDParseError("error parsing DTD", error_log)
+    return c_dtd
+
+cdef DTD _dtdFactory(tree.xmlDtd* c_dtd):
+    # do not run through DTD.__init__()!
+    cdef DTD dtd
+    if c_dtd is NULL:
+        return None
+    dtd = DTD.__new__(DTD)
+    dtd._c_dtd = _copyDtd(c_dtd)
+    _Validator.__init__(dtd)
+    return dtd
+
+
+cdef tree.xmlDtd* _copyDtd(tree.xmlDtd* c_orig_dtd) except NULL:
+    """
+    Copy a DTD.  libxml2 (currently) fails to set up the element->attributes
+    links when copying DTDs, so we have to rebuild them here.
+    """
+    c_dtd = tree.xmlCopyDtd(c_orig_dtd)
+    if not c_dtd:
+        raise MemoryError
+    cdef tree.xmlNode* c_node = c_dtd.children
+    while c_node:
+        if c_node.type == tree.XML_ATTRIBUTE_DECL:
+            _linkDtdAttribute(c_dtd, <tree.xmlAttribute*>c_node)
+        c_node = c_node.next
+    return c_dtd
+
+
+cdef void _linkDtdAttribute(tree.xmlDtd* c_dtd, tree.xmlAttribute* c_attr) noexcept:
+    """
+    Create the link to the DTD attribute declaration from the corresponding
+    element declaration.
+    """
+    c_elem = dtdvalid.xmlGetDtdElementDesc(c_dtd, c_attr.elem)
+    if not c_elem:
+        # no such element? something is wrong with the DTD ...
+        return
+    c_pos = c_elem.attributes
+    if not c_pos:
+        c_elem.attributes = c_attr
+        c_attr.nexth = NULL
+        return
+    # libxml2 keeps namespace declarations first, and we need to make
+    # sure we don't re-insert attributes that are already there
+    if _isDtdNsDecl(c_attr):
+        if not _isDtdNsDecl(c_pos):
+            c_elem.attributes = c_attr
+            c_attr.nexth = c_pos
+            return
+        while c_pos != c_attr and c_pos.nexth and _isDtdNsDecl(c_pos.nexth):
+            c_pos = c_pos.nexth
+    else:
+        # append at end
+        while c_pos != c_attr and c_pos.nexth:
+            c_pos = c_pos.nexth
+    if c_pos == c_attr:
+        return
+    c_attr.nexth = c_pos.nexth
+    c_pos.nexth = c_attr
+
+
+cdef bint _isDtdNsDecl(tree.xmlAttribute* c_attr) noexcept:
+    if cstring_h.strcmp(<const_char*>c_attr.name, "xmlns") == 0:
+        return True
+    if (c_attr.prefix is not NULL and
+            cstring_h.strcmp(<const_char*>c_attr.prefix, "xmlns") == 0):
+        return True
+    return False