You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
5.8 KiB
169 lines
5.8 KiB
# support for Schematron validation
|
|
from lxml.includes cimport schematron
|
|
|
|
|
|
cdef class SchematronError(LxmlError):
|
|
"""Base class of all Schematron errors.
|
|
"""
|
|
|
|
cdef class SchematronParseError(SchematronError):
|
|
"""Error while parsing an XML document as Schematron schema.
|
|
"""
|
|
|
|
cdef class SchematronValidateError(SchematronError):
|
|
"""Error while validating an XML document with a Schematron schema.
|
|
"""
|
|
|
|
|
|
################################################################################
|
|
# Schematron
|
|
|
|
cdef class Schematron(_Validator):
|
|
"""Schematron(self, etree=None, file=None)
|
|
A Schematron validator.
|
|
|
|
Pass a root Element or an ElementTree to turn it into a validator.
|
|
Alternatively, pass a filename as keyword argument 'file' to parse from
|
|
the file system.
|
|
|
|
Schematron is a less well known, but very powerful schema language. The main
|
|
idea is to use the capabilities of XPath to put restrictions on the structure
|
|
and the content of XML documents. Here is a simple example::
|
|
|
|
>>> schematron = Schematron(XML('''
|
|
... <schema xmlns="http://www.ascc.net/xml/schematron" >
|
|
... <pattern name="id is the only permitted attribute name">
|
|
... <rule context="*">
|
|
... <report test="@*[not(name()='id')]">Attribute
|
|
... <name path="@*[not(name()='id')]"/> is forbidden<name/>
|
|
... </report>
|
|
... </rule>
|
|
... </pattern>
|
|
... </schema>
|
|
... '''))
|
|
|
|
>>> xml = XML('''
|
|
... <AAA name="aaa">
|
|
... <BBB id="bbb"/>
|
|
... <CCC color="ccc"/>
|
|
... </AAA>
|
|
... ''')
|
|
|
|
>>> schematron.validate(xml)
|
|
0
|
|
|
|
>>> xml = XML('''
|
|
... <AAA id="aaa">
|
|
... <BBB id="bbb"/>
|
|
... <CCC/>
|
|
... </AAA>
|
|
... ''')
|
|
|
|
>>> schematron.validate(xml)
|
|
1
|
|
|
|
Schematron was added to libxml2 in version 2.6.21. Before version 2.6.32,
|
|
however, Schematron lacked support for error reporting other than to stderr.
|
|
This version is therefore required to retrieve validation warnings and
|
|
errors in lxml.
|
|
"""
|
|
cdef schematron.xmlSchematron* _c_schema
|
|
cdef xmlDoc* _c_schema_doc
|
|
def __cinit__(self):
|
|
self._c_schema = NULL
|
|
self._c_schema_doc = NULL
|
|
|
|
def __init__(self, etree=None, *, file=None):
|
|
cdef _Document doc
|
|
cdef _Element root_node
|
|
cdef xmlNode* c_node
|
|
cdef char* c_href
|
|
cdef schematron.xmlSchematronParserCtxt* parser_ctxt = NULL
|
|
_Validator.__init__(self)
|
|
if not config.ENABLE_SCHEMATRON:
|
|
raise SchematronError, \
|
|
"lxml.etree was compiled without Schematron support."
|
|
if etree is not None:
|
|
doc = _documentOrRaise(etree)
|
|
root_node = _rootNodeOrRaise(etree)
|
|
self._c_schema_doc = _copyDocRoot(doc._c_doc, root_node._c_node)
|
|
parser_ctxt = schematron.xmlSchematronNewDocParserCtxt(self._c_schema_doc)
|
|
elif file is not None:
|
|
filename = _getFilenameForFile(file)
|
|
if filename is None:
|
|
# XXX assume a string object
|
|
filename = file
|
|
filename = _encodeFilename(filename)
|
|
with self._error_log:
|
|
orig_loader = _register_document_loader()
|
|
parser_ctxt = schematron.xmlSchematronNewParserCtxt(_cstr(filename))
|
|
_reset_document_loader(orig_loader)
|
|
else:
|
|
raise SchematronParseError, "No tree or file given"
|
|
|
|
if parser_ctxt is NULL:
|
|
if self._c_schema_doc is not NULL:
|
|
tree.xmlFreeDoc(self._c_schema_doc)
|
|
self._c_schema_doc = NULL
|
|
raise MemoryError()
|
|
|
|
try:
|
|
with self._error_log:
|
|
orig_loader = _register_document_loader()
|
|
self._c_schema = schematron.xmlSchematronParse(parser_ctxt)
|
|
_reset_document_loader(orig_loader)
|
|
finally:
|
|
schematron.xmlSchematronFreeParserCtxt(parser_ctxt)
|
|
|
|
if self._c_schema is NULL:
|
|
raise SchematronParseError(
|
|
"Document is not a valid Schematron schema",
|
|
self._error_log)
|
|
|
|
def __dealloc__(self):
|
|
schematron.xmlSchematronFree(self._c_schema)
|
|
if self._c_schema_doc is not NULL:
|
|
tree.xmlFreeDoc(self._c_schema_doc)
|
|
|
|
def __call__(self, etree):
|
|
"""__call__(self, etree)
|
|
|
|
Validate doc using Schematron.
|
|
|
|
Returns true if document is valid, false if not."""
|
|
cdef _Document doc
|
|
cdef _Element root_node
|
|
cdef xmlDoc* c_doc
|
|
cdef schematron.xmlSchematronValidCtxt* valid_ctxt
|
|
cdef int ret
|
|
|
|
assert self._c_schema is not NULL, "Schematron instance not initialised"
|
|
doc = _documentOrRaise(etree)
|
|
root_node = _rootNodeOrRaise(etree)
|
|
|
|
valid_ctxt = schematron.xmlSchematronNewValidCtxt(
|
|
self._c_schema, schematron.XML_SCHEMATRON_OUT_ERROR)
|
|
if valid_ctxt is NULL:
|
|
raise MemoryError()
|
|
|
|
try:
|
|
self._error_log.clear()
|
|
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
|
schematron.xmlSchematronSetValidStructuredErrors(
|
|
valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
|
|
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
|
with nogil:
|
|
ret = schematron.xmlSchematronValidateDoc(valid_ctxt, c_doc)
|
|
_destroyFakeDoc(doc._c_doc, c_doc)
|
|
finally:
|
|
schematron.xmlSchematronFreeValidCtxt(valid_ctxt)
|
|
|
|
if ret == -1:
|
|
raise SchematronValidateError(
|
|
"Internal error in Schematron validation",
|
|
self._error_log)
|
|
if ret == 0:
|
|
return True
|
|
else:
|
|
return False
|