1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
# support for Schematron validation
from lxml.includes cimport schematron
cdef class SchematronError(LxmlError):
"""Base class of all Schematron errors.
"""
cdef class SchematronParseError(SchematronError):
"""Error while parsing an XML document as Schematron schema.
"""
cdef class SchematronValidateError(SchematronError):
"""Error while validating an XML document with a Schematron schema.
"""
################################################################################
# Schematron
cdef class Schematron(_Validator):
"""Schematron(self, etree=None, file=None)
A Schematron validator.
Pass a root Element or an ElementTree to turn it into a validator.
Alternatively, pass a filename as keyword argument 'file' to parse from
the file system.
Schematron is a less well known, but very powerful schema language. The main
idea is to use the capabilities of XPath to put restrictions on the structure
and the content of XML documents. Here is a simple example::
>>> schematron = Schematron(XML('''
... <schema xmlns="http://www.ascc.net/xml/schematron" >
... <pattern name="id is the only permitted attribute name">
... <rule context="*">
... <report test="@*[not(name()='id')]">Attribute
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
... </report>
... </rule>
... </pattern>
... </schema>
... '''))
>>> xml = XML('''
... <AAA name="aaa">
... <BBB id="bbb"/>
... <CCC color="ccc"/>
... </AAA>
... ''')
>>> schematron.validate(xml)
0
>>> xml = XML('''
... <AAA id="aaa">
... <BBB id="bbb"/>
... <CCC/>
... </AAA>
... ''')
>>> schematron.validate(xml)
1
Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32,
however, Schematron lacked support for error reporting other than to stderr.
This version is therefore required to retrieve validation warnings and
errors in lxml.
"""
cdef schematron.xmlSchematron* _c_schema
cdef xmlDoc* _c_schema_doc
def __cinit__(self):
self._c_schema = NULL
self._c_schema_doc = NULL
def __init__(self, etree=None, *, file=None):
cdef _Document doc
cdef _Element root_node
cdef xmlNode* c_node
cdef char* c_href
cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL
_Validator.__init__(self)
if not config.ENABLE_SCHEMATRON:
raise SchematronError, \
"lxml.etree was compiled without Schematron support."
if etree is not None:
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc)
elif file is not None:
filename = _getFilenameForFile(file)
if filename is None:
# XXX assume a string object
filename = file
filename = _encodeFilename(filename)
with self._error_log:
orig_loader = _register_document_loader()
parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
_reset_document_loader(orig_loader)
else:
raise SchematronParseError, "No tree or file given"
if parser_ctxt is NULL:
if self._c_schema_doc is not NULL:
tree.xmlFreeDoc(self._c_schema_doc)
self._c_schema_doc = NULL
raise MemoryError()
try:
with self._error_log:
orig_loader = _register_document_loader()
self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
_reset_document_loader(orig_loader)
finally:
schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
if self._c_schema is NULL:
raise SchematronParseError(
"Document is not a valid Schematron schema",
self._error_log)
def __dealloc__(self):
schematron.xmlSchematronFree(self._c_schema)
if self._c_schema_doc is not NULL:
tree.xmlFreeDoc(self._c_schema_doc)
def __call__(self, etree):
"""__call__(self, etree)
Validate doc using Schematron.
Returns true if document is valid, false if not."""
cdef _Document doc
cdef _Element root_node
cdef xmlDoc* c_doc
cdef schematron.xmlSchematronValidCtxt* valid_ctxt
cdef int ret
assert self._c_schema is not NULL, "Schematron instance not initialised"
doc = _documentOrRaise(etree)
root_node = _rootNodeOrRaise(etree)
valid_ctxt = schematron.xmlSchematronNewValidCtxt(
self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR)
if valid_ctxt is NULL:
raise MemoryError()
try:
self._error_log.clear()
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
schematron.xmlSchematronSetValidStructuredErrors(
valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
with nogil:
ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
_destroyFakeDoc(doc._c_doc, c_doc)
finally:
schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
if ret == -1:
raise SchematronValidateError(
"Internal error in Schematron validation",
self._error_log)
if ret == 0:
return True
else:
return False
|