Nektar++
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Pages
validator.py
Go to the documentation of this file.
1 #!/usr/bin/env python
2 
3 """Copyright (c) 2005-2016, University of Oxford.
4 All rights reserved.
5 
6 University of Oxford means the Chancellor, Masters and Scholars of the
7 University of Oxford, having an administrative office at Wellington
8 Square, Oxford OX1 2JD, UK.
9 
10 This file is part of Chaste.
11 
12 Redistribution and use in source and binary forms, with or without
13 modification, are permitted provided that the following conditions are met:
14  * Redistributions of source code must retain the above copyright notice,
15  this list of conditions and the following disclaimer.
16  * Redistributions in binary form must reproduce the above copyright notice,
17  this list of conditions and the following disclaimer in the documentation
18  and/or other materials provided with the distribution.
19  * Neither the name of the University of Oxford nor the names of its
20  contributors may be used to endorse or promote products derived from this
21  software without specific prior written permission.
22 
23 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
27 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
29 GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32 OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33 """
34 
35 # Validator for CellML 1.0
36 # Author: Jonathan Cooper
37 
38 # We want 1/2==0.5
39 from __future__ import division
40 
41 import codecs
42 import optparse
43 import os
44 import re
45 import subprocess
46 import sys
47 
48 # Make sure PyCml is on sys.path
49 pycml_path = os.path.dirname(os.path.realpath(__file__))
50 sys.path[0:0] = [pycml_path]
51 
52 # Common CellML processing stuff
53 import pycml
54 from pycml import * # Put contents in the local namespace as well
55 
56 __version__ = "$Revision: 25790 $"[11:-2]
57 
58 
59 
60 class ValidatorError(Exception):
61  """Base class for errors trying to run validator stages."""
62  def __init__(self, value):
63  self.value = value
64  def __str__(self):
65  return repr(self.value)
66 
67 
68 
69 class CellMLValidator(object):
70  def __init__(self, create_relaxng_validator=True):
71  """Initialise a validator for CellML files."""
72  # Create validator from RELAX NG schema
73  self.relaxng_validator = None
74  if create_relaxng_validator:
75  schema_base = os.path.join(pycml_path, 'cellml1.0')
76  run_errors = []
77  for klass in [LxmlRelaxngValidator, RvpRelaxngValidator]:
78  try:
79  self.relaxng_validator = klass(schema_base)
80  except ValidatorError, e:
81  run_errors.append(str(e))
82  else:
83  break
84  if not self.relaxng_validator:
85  msg = '\n\t'.join(["Unable to run a RELAX NG validator. Please install lxml or rvp."]
86  + run_errors)
87  raise ValidatorError(msg)
88 
89  def quit(self):
90  """
91  Since using __del__ is precarious, we provide this method to allow
92  the RVP process to be killed cleanly. Call it when the validator
93  is finished with, or you'll get an interesting error when the program
94  terminates (if using RVP).
95  """
96  if self.relaxng_validator:
97  self.relaxng_validator.quit()
98  return
99 
100  @staticmethod
101  def setup_logging(show_errors=True, error_stream=sys.stderr,
102  show_warnings=True, warning_stream=sys.stderr,
103  space_errors=False, loglevel=logging.WARNING,
104  **kwargs):
105  """Set up loggers for validation errors/warnings.
106 
107  Set show_errors or show_warnings to False to suppress the output of
108  validation error or warning messages, respectively. When not
109  suppressed, the messages will be output to the streams given by
110  error_stream and warning_stream; these should be file-like objects.
111 
112  If space_errors is True, a blank line will be inserted between
113  each message.
114  """
115  logger = logging.getLogger('validator')
116  logger.setLevel(loglevel)
117  if space_errors:
118  formatter = logging.Formatter(fmt="%(message)s\n")
119  else:
120  formatter = logging.Formatter(fmt="%(message)s")
121  error_handler = logging.StreamHandler(error_stream)
122  error_handler.setLevel(logging.ERROR)
123  error_handler.setFormatter(formatter)
124  warning_handler = logging.StreamHandler(warning_stream)
125  warning_handler.addFilter(OnlyWarningsFilter())
126  warning_handler.setFormatter(formatter)
127  if not show_errors:
128  error_handler.setLevel(logging.CRITICAL)
129  if not show_warnings:
130  warning_handler.setLevel(logging.CRITICAL)
131  logger.addHandler(error_handler)
132  logger.addHandler(warning_handler)
133  return error_handler, warning_handler
134 
135  @staticmethod
136  def cleanup_logging(handlers):
137  """Flush logger & remove handlers."""
138  logger = logging.getLogger('validator')
139  for handler in handlers:
140  handler.flush()
141  logger.removeHandler(handler)
142 
143  def validate(self, source, return_doc=False, assume_valid=False, **kw):
144  """Validate the given document.
145 
146  source should be a file-like object, URI, local file name,
147  or '-' for standard input. If a file-like object, it must support
148  the seek method to reset it.
149 
150  If return_doc is True then the result is a tuple (valid, document),
151  where if valid==True then document is an Amara binding of the CellML
152  document.
153  Otherwise just return True iff the document is valid.
154 
155  If xml_context is True, then the failing XML tree will be displayed
156  with every units error.
157 
158  The assume_valid option allows you to skip RELAX NG validation, along
159  with many of the checks in the Python code. This is useful for speeding
160  transformation of models that are known to pass these checks.
161 
162  See cellml_model.validate and setup_logging for other keyword arguments.
163  """
164  logging_info = CellMLValidator.setup_logging(**kw)
165 
166  # Validate against RELAX NG schema
167  DEBUG('validator', 'CellML Validator version', __version__)
168  res = True
169  # Get stream of CellML document
170  if source == '-':
171  stream = sys.stdin
172  elif hasattr(source, 'read'):
173  stream = source
174  elif bt.Uri.IsAbsolute(source):
175  stream = bt.Url.UrlOpen(source)
176  else:
177  if not os.path.isfile(source):
178  res = False
179  logging.getLogger('validator').error('File ' + source + ' does not exist.')
180  else:
181  stream = file(source, 'r')
182  # Parse & validate
183  if res and not assume_valid:
184  DEBUG('validator', 'Starting RELAX NG validation')
185  res = self.relaxng_validator.validate(stream)
186  if stream == source:
187  source.seek(0)
188  elif not stream == sys.stdin:
189  stream.close()
190  DEBUG('validator', 'Finished RELAX NG:', res)
191 
192  # Check further rules that can't be expressed by a (RELAX NG) schema.
193  # We use our own Python code for this.
194  if res:
195  DEBUG('validator', 'Loading model with Amara')
196  doc = amara_parse_cellml(source)
197  DEBUG('validator', 'Validating loaded model')
198  res = doc.model.validate(assume_valid=assume_valid, **kw)
199  DEBUG('validator', 'Validation complete:', res)
200  else:
201  doc = None
202 
203  # Flush logger & remove handlers
204  CellMLValidator.cleanup_logging(logging_info)
205 
206  # Return result
207  if return_doc:
208  return (res, doc)
209  else:
210  if doc:
211  doc.model.clean_up()
212  return res
213 
214 
215 class LxmlRelaxngValidator(object):
216  """
217  A RELAX NG validator built on top of lxml (http://lxml.de/validation.html#relaxng).
218  Can validate against schemas written in the XML syntax.
219  """
220  def __init__(self, schemaBase):
221  """Initialise the RELAX NG validator.
222 
223  Parses the schema into memory, and constructs lxml's validator object.
224  We are passed the path to the schema with no extension.
225  """
226  try:
227  from lxml import etree
228  except ImportError, e:
229  raise ValidatorError("Unable to import lxml: " + str(e))
230  fp = open(schemaBase + '.rng', 'r')
231  schema_doc = etree.parse(fp)
232  self._validator = etree.RelaxNG(schema_doc)
233 
234  def validate(self, stream):
235  """Validate an XML document, returning a boolean.
236 
237  stream should be a file-like object containing the document to be validated.
238  Returns True iff the document was valid."""
239  from lxml import etree
240  doc = etree.parse(stream)
241  res = self._validator.validate(doc)
242  # Report error details via the logger
243  logger = logging.getLogger('validator')
244  for e in self._validator.error_log:
245  logger.error(e)
246  return res
247 
248  def quit(self):
249  """Providing for compatibility with RvpRelaxngValidator; does nothing."""
250  pass
251 
252 
253 class RvpRelaxngValidator(object):
254  """
255  A RELAX NG validator built on top of RVP (http://www.davidashen.net/rnv.html).
256  Can validate against schemas written in the compact syntax.
257  """
259  """Raised if the response from RVP is not understood."""
260  pass
261 
262  def __init__(self, schemaBase):
263  """Initialise the RELAX NG validator.
264  Launches RVP as a parallel process.
265  schema_filename should be the name of a file containing the RELAX NG schema, in compact syntax.
266  """
267  self._ws = re.compile('[^\t\n\r ]')
268  schema_filename = schemaBase + '.rnc'
269  # Launch RVP
270  try:
271  self._rvp_pipe = subprocess.Popen(['rvp', schema_filename],
272  stdin=subprocess.PIPE, stdout=subprocess.PIPE, close_fds=True)
273  except OSError, e:
274  raise self.RvpProtocolError("Failed to run rvp for CellML syntax validation: " + str(e))
275  # We use os.read & os.write, so store file descriptors
276  self._rvpin, self._rvpout = self._rvp_pipe.stdin.fileno(), self._rvp_pipe.stdout.fileno()
277  # Import Expat parser module
278  import xml.parsers.expat
279  self.expat = xml.parsers.expat
280 
281 ## def __del__(self):
282 ## """
283 ## Tell our RVP process to quit.
284 ## This doesn't work well, since __del__ isn't necessarily called at
285 ## program exit. Hence, there is a manual quit() method.
286 ## """
287 ## self.quit()
288  def quit(self):
289  """Call this method when the validator is finished with.
290  It terminates the associated RVP process cleanly.
291  Failure to do so will probably result in an error when your program exits.
292  """
293  self._send('quit')
294  return self._resp()
295 
296  def validate(self, stream):
297  """Validate an XML document, returning a boolean.
298 
299  stream should be a file-like object containing the document to
300  be validated.
301  Returns True iff the document was valid.
302  """
303  # Initialise
304  self._text = ''
305  self._errors, self._error_messages = False, []
306  self._prevline, self._prevcol = -1, -1
307  self._pat = self._start()
308  # Create parser
309  parser = self.expat.ParserCreate(namespace_separator=':')
310  parser.StartElementHandler = self._start_element
311  parser.EndElementHandler = self._end_element
312  parser.CharacterDataHandler = self._characters
313  self._parser = parser
314  # Parse & validate
315  try:
316  self._parser.ParseFile(stream)
317  except self.expat.ExpatError, e:
318  self._errors = True
319  self._error_found(e.lineno, e.offset, str(e))
320  # Any errors
321  return not self._errors
322 
324  """
325  Return the list of all errors found while validating
326  the current file.
327  """
328  return self._error_messages
329 
330  def _error_found(self, line, col, msg):
331  report = "%d,%d:%s" % (line, col, msg.strip())
332  logging.getLogger('validator').error(report)
333  self._error_messages.append(report)
334 
335  # RVP protocol methods
336  def _start_tag_open(self, cur, name):
337  self._send('start-tag-open '+cur+' '+name)
338  return self._resp()
339  def _attribute(self, cur, name, val):
340  self._send('attribute '+cur+' '+name+' '+val)
341  return self._resp()
342  def _start_tag_close(self, cur, name):
343  self._send('start-tag-close '+cur+' '+name)
344  return self._resp()
345  def _end_tag(self, cur, name):
346  self._send('end-tag '+cur+' '+name)
347  return self._resp()
348  def _textonly(self, cur, text):
349  self._send('text '+cur+' '+text)
350  return self._resp()
351  def _mixed(self, cur, text):
352  """
353  In mixed content, whitespace is discarded, and any non-whitespace
354  is counted as equal.
355  """
356  if self._ws.search(text):
357  self._send('mixed '+cur+' .')
358  return self._resp()
359  else:
360  return cur
361  def _start(self, grammar = '0'):
362  self._send('start '+grammar)
363  return self._resp()
364 
365  # Low-level communication with RVP
366  def _send(self, s):
367  """
368  Terminate string with zero, encode in UTF-8 and send to RVP.
369  """
370  os.write(self._rvpin, s.encode('UTF-8') + '\0')
371  def _recv(self):
372  """
373  Receive a zero-terminated response from RVP; drop zero byte.
374  """
375  s = ''
376  while True:
377  # 16 is a good buffer length for ok responses; errors
378  # should be rare
379  s = s + os.read(self._rvpout, 16)
380  if s[-1] == '\0': break
381  return s[:-1]
382  def _resp(self):
383  """
384  Get a reply from RVP.
385  If an error occurs, log the message.
386  Return the current pattern value.
387  """
388  r = self._recv().split(' ', 3)
389  if r[0] == 'ok': return r[1]
390  if r[0] == 'error':
391  self._errors = True
392  if r[3] != '': # Only report if we have a message
393  line = self._parser.CurrentLineNumber
394  col = self._parser.CurrentColumnNumber
395  if line != self._prevline or col != self._prevcol:
396  # One report per file position
397  self._error_found(line, col, r[3])
398  self._prevline, self._prevcol = line, col
399  return r[1]
400  if r[0] == 'er':
401  self._errors = True
402  return r[1]
403  # Unknown response
404  raise self.RvpProtocolError, "unexpected response '"+r[0]+"'"
405 
406 
407  # Expat handlers
408  def _flush_text(self):
409  """
410  Apparently Expat doesn't concatenate text nodes, so we do it
411  manually; the CharDataHandler collects the text, and this
412  method passes it to the validator.
413  """
414  if self._text:
415  if self._ismixed:
416  self._pat = self._mixed(self._pat, self._text)
417  else:
418  self._pat = self._textonly(self._pat, self._text)
419  self._text = ''
420  def _start_element(self, name, attrs):
421  self._ismixed = True
422  self._flush_text()
423  self._pat = self._start_tag_open(self._pat, name)
424  self._ismixed = False
425  for n, v in attrs.items():
426  self._pat = self._attribute(self._pat, n, v)
427  self._pat = self._start_tag_close(self._pat, name)
428  def _end_element(self, name):
429  self._flush_text()
430  self._pat = self._end_tag(self._pat, name)
431  self._ismixed = True
432  def _characters(self, data):
433  self._text = self._text + data
434 
435 
436 
437 
438 
439 
440 ######################################################################
441 # Convenience functions #
442 ######################################################################
443 
444 def check_repo(repo_dir = '../../models/all_from_repository',
445  model_suffix = 'xml',
446  invalid_if_warnings = False,
447  compare=True):
448  """
449  Validate every model in the CellML repository, and return a list
450  of invalid models.
451 
452  If compare is False, writes errors & warnings to log files in the
453  same folder as the models, otherwise compares the output to log
454  files already present, and notes differences.
455 
456  Displays total run time.
457  """
458  def close_log_file(stream, filename):
459  stream.close()
460  try:
461  size = os.path.getsize(filename)
462  if size == 0:
463  os.remove(filename)
464  except OSError:
465  pass
466  import glob, time, gc
467  start_time = time.time()
468  v = CellMLValidator()
469  invalid = []
470  files = glob.glob(repo_dir + '/*.' + model_suffix)
471  files.sort()
472  for filename in files:
473  model = os.path.basename(filename)[:-4]
474  fn = os.path.splitext(filename)[0]
475  warnfn, errfn = fn + '_warnings.log', fn + '_errors.log'
476  if compare:
477  warn_stream = StringIO()
478  err_stream = StringIO()
479  else:
480  warn_stream = open(warnfn, 'w')
481  err_stream = open(errfn, 'w')
482  print "Checking model",model,"at",time.strftime("%X %x"),
483  sys.stdout.flush()
484  valid = v.validate(filename, error_stream=err_stream,
485  warning_stream=warn_stream,
486  invalid_if_warnings=invalid_if_warnings)
487  if not valid:
488  print max(1,4 - (5+len(model))//8) * '\t', "X"
489  else:
490  print
491  if compare:
492  compare_output_files(warn_stream, warnfn)
493  compare_output_files(err_stream, errfn)
494  else:
495  close_log_file(err_stream, errfn)
496  close_log_file(warn_stream, warnfn)
497  if not valid:
498  invalid.append(model)
499  gc.collect()
500  elapsed_time = time.time() - start_time
501  mins,secs = int(elapsed_time//60), int(elapsed_time%60)
502  print len(files),"models checked in",mins,"minutes",secs,"seconds."
503  print len(invalid),"invalid"
504  v.quit()
505  return invalid
506 
507 def compare_output_files(new_stream, old_filename):
508  def save_new_output():
509  nfp = open(old_filename + '-new', 'w')
510  nfp.write(new_stream.getvalue())
511  nfp.close()
512  new_stream.seek(0, 2)
513  new_len = new_stream.tell()
514  try:
515  fp = open(old_filename, 'r')
516  except IOError:
517  if new_len > 0:
518  print "Log file", old_filename, "doesn't exist,", \
519  "but we have new output"
520  try:
521  ofp = open(os.path.join(os.path.dirname(old_filename),
522  'new'), 'a')
523  print >>ofp, "Log file", old_filename, "doesn't exist,", \
524  "but we have new output"
525  ofp.close()
526  except IOError:
527  pass
528  save_new_output()
529  return
530  new_stream.seek(0)
531  new_lines = set(new_stream.readlines())
532  old_lines = set(fp.readlines())
533  if old_lines != new_lines:
534  print "Output set differs from log file", old_filename
535  print "Lines added:", new_lines - old_lines
536  print "Lines removed:", old_lines - new_lines
537  try:
538  ofp = open(os.path.join(os.path.dirname(old_filename), 'new'), 'a')
539  print >>ofp, "Output set differs from log file", old_filename
540  print >>ofp, "Lines added:", new_lines - old_lines
541  print >>ofp, "Lines removed:", old_lines - new_lines, "\n"
542  ofp.close()
543  except IOError:
544  pass
545  save_new_output()
546  new_stream.close()
547  fp.close()
548  return
549 
550 ######################################################################
551 # For running as an executable #
552 ######################################################################
553 
554 def get_options(args):
555  """get_options(args):
556  Process our command-line options.
557 
558  args is a list of options & positional arguments.
559  """
560  usage = 'usage: %prog [options] <cellml file or URI> ...'
561  parser = optparse.OptionParser(version="%%prog %s" % __version__,
562  usage=usage)
563  parser.add_option('-o', dest='outfilename', metavar='OUTFILE',
564  help='write *all* output to OUTFILE (overrides -e and -w)')
565  parser.add_option('-e', '--error-file',
566  dest='errfilename', metavar='ERRFILE',
567  default='stderr',
568  help='write errors to ERRFILE [default %default]')
569  parser.add_option('-w', '--warning-file',
570  dest='warnfilename', metavar='WARNFILE',
571  default='stderr',
572  help='write warnings to WARNFILE [default %default]')
573  parser.add_option('-q', '--quiet',
574  dest='quiet', action='store_true', default=False,
575  help="don't display any output, just set exit status")
576  parser.add_option('--no-warnings', '--only-errors',
577  dest='show_warnings', action='store_false',
578  default=True,
579  help="don't display warning messages")
580  parser.add_option('-s', '--space-messages',
581  dest='space_errors', action='store_true',
582  default=False,
583  help="print a blank line after every warning/error message")
584  parser.add_option('-x', '--xml-context',
585  dest='xml_context', action='store_true', default=False,
586  help="display the MathML tree of any expression with invalid units")
587  parser.add_option('-u', '--warn-on-unit-conversions',
588  action='store_true', default=False,
589  help="generate a warning if unit conversions are required")
590  parser.add_option('--Wu', '--warn-on-units-errors',
591  action='store_true', default=False,
592  dest='warn_on_units_errors',
593  help="give a warning instead of an error for"
594  " dimensional inconsistencies")
595  parser.add_option('-i', '--interactive',
596  action='store_true', default=False,
597  help="use with python -i to enter an interactive session"
598  " after validation")
599  parser.add_option('-d', '--debug', action='store_true', default=False,
600  help="output debug info to stderr")
601  parser.add_option('--profile', action='store_true', default=False,
602  help="turn on profiling of PyCml")
603 
604  options, args = parser.parse_args(args)
605  if len(args) < 1:
606  parser.error("an input CellML file must be specified")
607  return options, args
608 
609 
610 def run():
611  # Validate all files specified on the command line
612  options, files = get_options(sys.argv[1:])
613  validator = CellMLValidator()
614 
615  # Open output streams
616  if not options.outfilename is None:
617  out_s = open_output_stream(options.outfilename)
618  err_s = warn_s = out_s
619  else:
620  out_s = sys.stdout
621  err_s = open_output_stream(options.errfilename)
622  warn_s = open_output_stream(options.warnfilename)
623 
624  # Keyword arguments for validator
625  kwargs = {'show_warnings': options.show_warnings,
626  'warning_stream': warn_s,
627  'show_errors': not options.quiet,
628  'error_stream': err_s,
629  'space_errors': options.space_errors,
630  'xml_context': options.xml_context,
631  'warn_on_units_errors': options.warn_on_units_errors,
632  'check_for_units_conversions': options.warn_on_unit_conversions}
633 
634  if options.debug:
635  formatter = logging.Formatter(fmt="%(name)s %(asctime)s: %(message)s")
636  handler = logging.StreamHandler(sys.stderr)
637  handler.setFormatter(formatter)
638  handler.addFilter(OnlyDebugFilter())
639  logging.getLogger().addHandler(handler)
640  logging.getLogger().setLevel(logging.DEBUG)
641  kwargs['loglevel'] = logging.DEBUG
642 
643  # Validate files
644  result = True
645  for f in files:
646  if not options.quiet:
647  print >>out_s, "Validating file", f, "against CellML 1.0"
648  res = validator.validate(f, **kwargs)
649  if not options.quiet:
650  print >>out_s, "File is%s valid CellML 1.0" % ((' NOT','')[res])
651  result = result and res
652 
653  # Close output streams
654  close_output_stream(out_s)
655  close_output_stream(err_s)
656  close_output_stream(warn_s)
657 
658  # Tidy up validator
659  validator.quit()
660 
661  # Set exit status
662  if not options.interactive:
663  sys.exit(not result)
664 
665 
666 if __name__ == '__main__':
667  if '--profile' in sys.argv:
668  import time, cProfile
669  profile_name = '/tmp/pycml-profile-%f-%d' % (time.time(), os.getpid())
670  cProfile.run('run()', profile_name)
671  else:
672  run()
def check_repo
Convenience functions #.
Definition: validator.py:447
def amara_parse_cellml
Definition: pycml.py:191
def quit
def del(self): """ Tell our RVP process to quit.
Definition: validator.py:288
def get_options
For running as an executable #.
Definition: validator.py:554