diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/lxml/schematron.pxi')
-rw-r--r-- | .venv/lib/python3.12/site-packages/lxml/schematron.pxi | 168 |
1 files changed, 168 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/lxml/schematron.pxi b/.venv/lib/python3.12/site-packages/lxml/schematron.pxi new file mode 100644 index 00000000..ea0881fd --- /dev/null +++ b/.venv/lib/python3.12/site-packages/lxml/schematron.pxi @@ -0,0 +1,168 @@ +# support for Schematron validation +from lxml.includes cimport schematron + + +cdef class SchematronError(LxmlError): + """Base class of all Schematron errors. + """ + +cdef class SchematronParseError(SchematronError): + """Error while parsing an XML document as Schematron schema. + """ + +cdef class SchematronValidateError(SchematronError): + """Error while validating an XML document with a Schematron schema. + """ + + +################################################################################ +# Schematron + +cdef class Schematron(_Validator): + """Schematron(self, etree=None, file=None) + A Schematron validator. + + Pass a root Element or an ElementTree to turn it into a validator. + Alternatively, pass a filename as keyword argument 'file' to parse from + the file system. + + Schematron is a less well known, but very powerful schema language. The main + idea is to use the capabilities of XPath to put restrictions on the structure + and the content of XML documents. Here is a simple example:: + + >>> schematron = Schematron(XML(''' + ... <schema xmlns="http://www.ascc.net/xml/schematron" > + ... <pattern name="id is the only permitted attribute name"> + ... <rule context="*"> + ... <report test="@*[not(name()='id')]">Attribute + ... <name path="@*[not(name()='id')]"/> is forbidden<name/> + ... </report> + ... </rule> + ... </pattern> + ... </schema> + ... ''')) + + >>> xml = XML(''' + ... <AAA name="aaa"> + ... <BBB id="bbb"/> + ... <CCC color="ccc"/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + 0 + + >>> xml = XML(''' + ... <AAA id="aaa"> + ... <BBB id="bbb"/> + ... <CCC/> + ... </AAA> + ... ''') + + >>> schematron.validate(xml) + 1 + + Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32, + however, Schematron lacked support for error reporting other than to stderr. + This version is therefore required to retrieve validation warnings and + errors in lxml. + """ + cdef schematron.xmlSchematron* _c_schema + cdef xmlDoc* _c_schema_doc + def __cinit__(self): + self._c_schema = NULL + self._c_schema_doc = NULL + + def __init__(self, etree=None, *, file=None): + cdef _Document doc + cdef _Element root_node + cdef xmlNode* c_node + cdef char* c_href + cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL + _Validator.__init__(self) + if not config.ENABLE_SCHEMATRON: + raise SchematronError, \ + "lxml.etree was compiled without Schematron support." + if etree is not None: + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node) + parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc) + elif file is not None: + filename = _getFilenameForFile(file) + if filename is None: + # XXX assume a string object + filename = file + filename = _encodeFilename(filename) + with self._error_log: + orig_loader = _register_document_loader() + parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename)) + _reset_document_loader(orig_loader) + else: + raise SchematronParseError, "No tree or file given" + + if parser_ctxt is NULL: + if self._c_schema_doc is not NULL: + tree.xmlFreeDoc(self._c_schema_doc) + self._c_schema_doc = NULL + raise MemoryError() + + try: + with self._error_log: + orig_loader = _register_document_loader() + self._c_schema = schematron.xmlSchematronParse(parser_ctxt) + _reset_document_loader(orig_loader) + finally: + schematron.xmlSchematronFreeParserCtxt(parser_ctxt) + + if self._c_schema is NULL: + raise SchematronParseError( + "Document is not a valid Schematron schema", + self._error_log) + + def __dealloc__(self): + schematron.xmlSchematronFree(self._c_schema) + if self._c_schema_doc is not NULL: + tree.xmlFreeDoc(self._c_schema_doc) + + def __call__(self, etree): + """__call__(self, etree) + + Validate doc using Schematron. + + Returns true if document is valid, false if not.""" + cdef _Document doc + cdef _Element root_node + cdef xmlDoc* c_doc + cdef schematron.xmlSchematronValidCtxt* valid_ctxt + cdef int ret + + assert self._c_schema is not NULL, "Schematron instance not initialised" + doc = _documentOrRaise(etree) + root_node = _rootNodeOrRaise(etree) + + valid_ctxt = schematron.xmlSchematronNewValidCtxt( + self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR) + if valid_ctxt is NULL: + raise MemoryError() + + try: + self._error_log.clear() + # Need a cast here because older libxml2 releases do not use 'const' in the functype. + schematron.xmlSchematronSetValidStructuredErrors( + valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log) + c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node) + with nogil: + ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc) + _destroyFakeDoc(doc._c_doc, c_doc) + finally: + schematron.xmlSchematronFreeValidCtxt(valid_ctxt) + + if ret == -1: + raise SchematronValidateError( + "Internal error in Schematron validation", + self._error_log) + if ret == 0: + return True + else: + return False |