Package RDFClosure :: Module RestrictedDatatype
[hide private]
[frames] | no frames]

Source Code for Module RDFClosure.RestrictedDatatype

  1  # -*- coding: utf-8 -*- 
  2  # 
  3  """ 
  4  Module to datatype restrictions, ie, data ranges. 
  5    
  6  The module implements the following aspects of datatype restrictions: 
  7   
  8   - a new datatype is created run-time and added to the allowed and accepted datatypes; literals are checked whether they abide to the restrictions 
  9   - the new datatype is defined to be a 'subClass' of the restricted datatype 
 10   - literals of the restricted datatype and that abide to the restrictions defined by the facets are also assigned to be of the new type 
 11    
 12  The last item is important to handle the following structures:: 
 13   ex:RE a owl:Restriction ; 
 14          owl:onProperty ex:p ; 
 15          owl:someValuesFrom [ 
 16                  a rdfs:Datatype ; 
 17                  owl:onDatatype xsd:string ; 
 18                  owl:withRestrictions ( 
 19                          [ xsd:minLength "3"^^xsd:integer ] 
 20                          [ xsd:maxLength "6"^^xsd:integer ] 
 21                  ) 
 22          ] 
 23   . 
 24   ex:q ex:p "abcd"^^xsd:string. 
 25  In the case above the system can then infer that C{ex:q} is also of type C{ex:RE}. 
 26   
 27  Datatype restrictions are used by the L{OWL RL Extensions<OWLRLExtras.OWLRL_Extension>} extension class. 
 28   
 29  The implementation is not 100% complete. Some things that an ideal implementation should do are not done yet like: 
 30   
 31   - checking whether a facet is of a datatype that is allowed for that facet 
 32   - handling of non-literals in the facets (ie, if the resource is defined to be of type literal, but whose value 
 33   is defined via a separate 'owl:sameAs' somewhere else) 
 34   
 35  @requires: U{RDFLib<https://github.com/RDFLib/rdflib>}, 4.0.0 and higher 
 36  @license: This software is available for use under the U{W3C Software License<http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231>} 
 37  @organization: U{World Wide Web Consortium<http://www.w3.org>} 
 38  @author: U{Ivan Herman<a href="http://www.w3.org/People/Ivan/">} 
 39   
 40  """ 
 41   
 42  __author__  = 'Ivan Herman' 
 43  __contact__ = 'Ivan Herman, ivan@w3.org' 
 44  __license__ = u'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231' 
 45   
 46  import re 
 47   
 48  from OWL import * 
 49  # noinspection PyPep8Naming,PyPep8Naming 
 50  from OWL import OWLNS as ns_owl 
 51  from RDFClosure.RDFS import Datatype 
 52  from RDFClosure.RDFS import type 
 53  # noinspection PyPep8Naming 
 54  from RDFClosure.RDFS import RDFNS as ns_rdf 
 55   
 56  from rdflib     import Literal as rdflibLiteral 
 57  # noinspection PyPep8Naming 
 58  from rdflib.namespace import XSD as ns_xsd 
 59   
 60  from DatatypeHandling import AltXSDToPYTHON 
 61   
 62  #: Constant for datatypes using min, max (inclusive and exclusive): 
 63  MIN_MAX                                 = 0 
 64  #: Constant for datatypes using length, minLength, and maxLength (and nothing else) 
 65  LENGTH                                  = 1 
 66  #: Constant for datatypes using length, minLength, maxLength, and pattern 
 67  LENGTH_AND_PATTERN              = 2 
 68  #: Constant for datatypes using length, minLength, maxLength, pattern, and lang range 
 69  LENGTH_PATTERN_LRANGE   = 3 
 70   
 71  #: Dictionary of all the datatypes, keyed by category 
 72  Datatypes_per_facets = { 
 73          MIN_MAX                                 : [ns_owl["rational"], ns_xsd["decimal"], ns_xsd["integer"], 
 74                                                                  ns_xsd["nonNegativeInteger"], ns_xsd["nonPositiveInteger"], 
 75                                                                  ns_xsd["positiveInteger"], ns_xsd["negativeInteger"], 
 76                                                                  ns_xsd["long"], ns_xsd["short"], ns_xsd["byte"], 
 77                                                                  ns_xsd["unsignedLong"], ns_xsd["unsignedInt"], ns_xsd["unsignedShort"], ns_xsd["unsignedByte"], 
 78                                                                  ns_xsd["double"], ns_xsd["float"], 
 79                                                                  ns_xsd["dateTime"], ns_xsd["dateTimeStamp"], ns_xsd["time"], ns_xsd["date"] 
 80                                                            ], 
 81          LENGTH                                  : [ns_xsd["hexBinary"], ns_xsd["base64Binary"]], 
 82          LENGTH_AND_PATTERN              : [ns_xsd["anyURI"], ns_xsd["string"], ns_xsd["NMTOKEN"], ns_xsd["Name"], ns_xsd["NCName"], 
 83                                                                  ns_xsd["language"], ns_xsd["normalizedString"] 
 84                                                            ], 
 85          LENGTH_PATTERN_LRANGE   : [ns_rdf["plainLiteral"]] 
 86  } 
 87   
 88  #: a simple list containing C{all} datatypes that may have a facet 
 89  facetable_datatypes = reduce(lambda x, y: x + y, Datatypes_per_facets.values()) 
 90   
 91  ####################################################################################################### 
 92   
93 -def _lit_to_value(dt, v):
94 """ 95 This method is used to convert a string to a value with facet checking. RDF Literals are converted to 96 Python values using this method; if there is a problem, an exception is raised (and caught higher 97 up to generate an error message). 98 99 The method is the equivalent of all the methods in the L{DatatypeHandling} module, and is registered 100 to the system run time, as new restricted datatypes are discovered. 101 102 (Technically, the registration is done via a C{lambda v: _lit_to_value(self,v)} setting from within a 103 L{RestrictedDatatype} instance) 104 @param dt: faceted datatype 105 @type dt: L{RestrictedDatatype} 106 @param v: literal to be converted and checked 107 @raise ValueError: invalid literal value 108 """ 109 # This may raise an exception... 110 value = dt.converter(v) 111 112 # look at the different facet categories and try to find which is 113 # is, if any, the one that is of relevant for this literal 114 for cat in Datatypes_per_facets.keys(): 115 if dt.base_type in Datatypes_per_facets[cat]: 116 # yep, this is to be checked. 117 if not dt.checkValue(value) : 118 raise ValueError("Literal value %s does not fit the faceted datatype %s" % (v,dt)) 119 # got here, everything should be fine 120 return value
121 122 # noinspection PyPep8Naming,PyShadowingBuiltins
123 -def _lang_range_check(range, lang):
124 """ 125 Implementation of the extended filtering algorithm, as defined in point 3.3.2, 126 of U{RFC 4647<http://www.rfc-editor.org/rfc/rfc4647.txt>}, on matching language ranges and language tags. 127 Needed to handle the C{rdf:PlainLiteral} datatype. 128 @param range: language range 129 @param lang: language tag 130 @rtype: boolean 131 """ 132 def _match(r, l): 133 """Matching of a range and language item: either range is a wildcard or the two are equal 134 @param r: language range item 135 @param l: language tag item 136 @rtype: boolean 137 """ 138 return r == '*' or r == l
139 140 rangeList = range.strip().lower().split('-') 141 langList = lang.strip().lower().split('-') 142 if not _match(rangeList[0], langList[0]): 143 return False 144 145 rI = 1 146 rL = 1 147 while rI < len(rangeList) : 148 if rangeList[rI] == '*' : 149 rI += 1 150 continue 151 if rL >= len(langList) : 152 return False 153 if _match(rangeList[rI], langList[rL]) : 154 rI += 1 155 rL += 1 156 continue 157 if len(langList[rL]) == 1 : 158 return False 159 else : 160 rL += 1 161 continue 162 return True 163 164 ####################################################################################################### 165
166 -def extract_faceted_datatypes(core, graph):
167 """ 168 Extractions of restricted (ie, faceted) datatypes from the graph. 169 @param core: the core closure instance that is being handled 170 @type core: L{Closure.Core} 171 @param graph: RDFLib graph 172 @return: array of L{RestrictedDatatype} instances 173 """ 174 retval = [] 175 for dtype in graph.subjects(type, Datatype) : 176 base_type = None 177 facets = [] 178 try : 179 base_types = [x for x in graph.objects(dtype, onDatatype)] 180 if len(base_types) > 0 : 181 if len(base_types) > 1 : 182 raise Exception("Several base datatype for the same restriction %s" % dtype) 183 else : 184 base_type = base_types[0] 185 if base_type in facetable_datatypes : 186 rlists = [x for x in graph.objects(dtype, withRestrictions)] 187 if len(rlists) > 1 : 188 raise Exception("More than one facet lists for the same restriction %s" % dtype) 189 elif len(rlists) > 0 : 190 final_facets = [] 191 for r in graph.items(rlists[0]) : 192 for (facet, lit) in graph.predicate_objects(r) : 193 if isinstance(lit, rdflibLiteral) : 194 # the python value of the literal should be extracted 195 # note that this call may lead to an exception, but that is fine, 196 # it is caught some lines below anyway... 197 try : 198 if lit.datatype is None or lit.datatype == ns_xsd["string"]: 199 final_facets.append((facet, str(lit))) 200 else : 201 final_facets.append((facet, AltXSDToPYTHON[lit.datatype](str(lit)))) 202 except Exception, msg : 203 core.add_error(msg) 204 continue 205 # We do have everything we need: 206 new_datatype = RestrictedDatatype(dtype, base_type, final_facets) 207 retval.append(new_datatype) 208 except Exception, msg: 209 #import sys 210 #print sys.exc_info() 211 #print sys.exc_type 212 #print sys.exc_value 213 #print sys.exc_traceback 214 core.add_error(msg) 215 continue 216 return retval
217 218 219 # noinspection PyPep8Naming
220 -class RestrictedDatatypeCore:
221 """An 'abstract' superclass for datatype restrictions. The instance variables listed here are 222 used in general, without the specificities of the concrete restricted datatype. 223 224 This module defines the L{RestrictedDatatype} class that corresponds to the datatypes and their restrictions 225 defined in the OWL 2 standard. Other modules may subclass this class to define new datatypes with restrictions. 226 @ivar type_uri : the URI for this datatype 227 @ivar base_type : URI of the datatype that is restricted 228 @ivar toPython : function to convert a Literal of the specified type to a Python value. 229 """
230 - def __init__(self, type_uri, base_type):
231 self.datatype = type_uri 232 self.base_type = base_type 233 self.toPython = None
234
235 - def checkValue(self, value) :
236 """ 237 Check whether the (python) value abides to the constraints defined by the current facets. 238 @param value: the value to be checked 239 @rtype: boolean 240 """ 241 raise Exception("This class should not be used by itself, only via its subclasses!")
242 243 # noinspection PyPep8Naming
244 -class RestrictedDatatype(RestrictedDatatypeCore):
245 """ 246 Implementation of a datatype with facets, ie, datatype with restrictions. 247 248 @ivar datatype : the URI for this datatype 249 @ivar base_type : URI of the datatype that is restricted 250 @ivar converter : method to convert a literal of the base type to a Python value (drawn from L{DatatypeHandling.AltXSDToPYTHON}) 251 @ivar minExclusive : value for the C{xsd:minExclusive} facet, initialized to C{None} and set to the right value if a facet is around 252 @ivar minInclusive : value for the C{xsd:minInclusive} facet, initialized to C{None} and set to the right value if a facet is around 253 @ivar maxExclusive : value for the C{xsd:maxExclusive} facet, initialized to C{None} and set to the right value if a facet is around 254 @ivar maxInclusive : value for the C{xsd:maxInclusive} facet, initialized to C{None} and set to the right value if a facet is around 255 @ivar minLength : value for the C{xsd:minLength} facet, initialized to C{None} and set to the right value if a facet is around 256 @ivar maxLength : value for the C{xsd:maxLength} facet, initialized to C{None} and set to the right value if a facet is around 257 @ivar length : value for the C{xsd:length} facet, initialized to C{None} and set to the right value if a facet is around 258 @ivar pattern : array of patterns for the C{xsd:pattern} facet, initialized to C{[]} and set to the right value if a facet is around 259 @ivar langRange : array of language ranges for the C{rdf:langRange} facet, initialized to C{[]} and set to the right value if a facet is around 260 @ivar check_methods : list of class methods that are relevant for the given C{base_type} 261 @ivar toPython : function to convert a Literal of the specified type to a Python value. Is defined by C{lambda v : _lit_to_value(self, v)}, see L{_lit_to_value} 262 """ 263
264 - def __init__(self, type_uri, base_type, facets):
265 """ 266 @param type_uri: URI of the datatype being defined 267 @param base_type: URI of the base datatype, ie, the one being restricted 268 @param facets : array of C{(facetURI, value)} pairs 269 """ 270 RestrictedDatatypeCore.__init__(self, type_uri, base_type) 271 if self.base_type not in AltXSDToPYTHON : 272 raise Exception("No facet is implemented for datatype %s" % self.base_type) 273 self.converter = AltXSDToPYTHON[self.base_type] 274 275 self.minExclusive = None 276 self.maxExclusive = None 277 self.minInclusive = None 278 self.maxInclusive = None 279 self.length = None 280 self.maxLength = None 281 self.minLength = None 282 self.pattern = [] 283 self.langRange = [] 284 for (facet, value) in facets: 285 if facet == ns_xsd["minInclusive"] and (self.minInclusive is None or self.minInclusive < value): 286 self.minInclusive = value 287 elif facet == ns_xsd["minExclusive"] and (self.minExclusive is None or self.minExclusive < value): 288 self.minExclusive = value 289 elif facet == ns_xsd["maxInclusive"] and (self.maxInclusive is None or value < self.maxInclusive): 290 self.maxInclusive = value 291 elif facet == ns_xsd["maxExclusive"] and (self.maxExclusive is None or value < self.maxExclusive): 292 self.maxExclusive = value 293 elif facet == ns_rdf["langRange"]: 294 self.langRange.append(value) 295 elif facet == ns_xsd["length"]: 296 self.length = value 297 elif facet == ns_xsd["maxLength"] and (self.maxLength is None or value < self.maxLength): 298 self.maxLength = value 299 elif facet == ns_xsd["minLength"] and (self.minLength is None or value > self.minLength): 300 self.minLength = value 301 elif facet == ns_xsd["pattern"] : 302 self.pattern.append(re.compile(value)) 303 304 # Choose the methods that are relevant for this datatype, based on the base type 305 facet_to_method = { 306 MIN_MAX : [RestrictedDatatype._check_max_exclusive, RestrictedDatatype._check_min_exclusive, 307 RestrictedDatatype._check_max_inclusive, RestrictedDatatype._check_min_inclusive], 308 LENGTH : [RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 309 RestrictedDatatype._check_length], 310 LENGTH_AND_PATTERN : [RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 311 RestrictedDatatype._check_length, RestrictedDatatype._check_pattern], 312 LENGTH_PATTERN_LRANGE : [RestrictedDatatype._check_min_length, RestrictedDatatype._check_max_length, 313 RestrictedDatatype._check_length, RestrictedDatatype._check_lang_range] 314 } 315 self.check_methods = [] 316 for cat in Datatypes_per_facets.keys(): 317 if self.base_type in Datatypes_per_facets[cat]: 318 self.check_methods = facet_to_method[cat] 319 break 320 self.toPython = lambda v : _lit_to_value(self, v)
321
322 - def checkValue(self, value) :
323 """ 324 Check whether the (python) value abides to the constraints defined by the current facets. 325 @param value: the value to be checked 326 @rtype: boolean 327 """ 328 for method in self.check_methods: 329 if not method(self, value): 330 return False 331 return True
332
333 - def _check_min_exclusive(self, value):
334 """ 335 Check the (python) value against min exclusive facet. 336 @param value: the value to be checked 337 @rtype: boolean 338 """ 339 if self.minExclusive is not None: 340 return self.minExclusive < value 341 else : 342 return True
343
344 - def _check_min_inclusive(self, value):
345 """ 346 Check the (python) value against min inclusive facet. 347 @param value: the value to be checked 348 @rtype: boolean 349 """ 350 if self.minInclusive is not None : 351 return self.minInclusive <= value 352 else : 353 return True
354
355 - def _check_max_exclusive(self, value) :
356 """ 357 Check the (python) value against max exclusive facet. 358 @param value: the value to be checked 359 @rtype: boolean 360 """ 361 if self.maxExclusive is not None : 362 return value < self.maxExclusive 363 else : 364 return True
365
366 - def _check_max_inclusive(self, value) :
367 """ 368 Check the (python) value against max inclusive facet. 369 @param value: the value to be checked 370 @rtype: boolean 371 """ 372 if self.maxInclusive is not None : 373 return value <= self.maxInclusive 374 else : 375 return True
376
377 - def _check_min_length(self, value) :
378 """ 379 Check the (python) value against minimum length facet. 380 @param value: the value to be checked 381 @rtype: boolean 382 """ 383 if isinstance(value, rdflibLiteral) : 384 val = str(value) 385 else : 386 val = value 387 if self.minLength is not None : 388 return self.minLength <= len(val) 389 else : 390 return True
391
392 - def _check_max_length(self, value):
393 """ 394 Check the (python) value against maximum length facet. 395 @param value: the value to be checked 396 @rtype: boolean 397 """ 398 if isinstance(value, rdflibLiteral): 399 val = str(value) 400 else : 401 val = value 402 if self.maxLength is not None: 403 return self.maxLength >= len(val) 404 else : 405 return True
406
407 - def _check_length(self, value):
408 """ 409 Check the (python) value against exact length facet. 410 @param value: the value to be checked 411 @rtype: boolean 412 """ 413 if isinstance(value, rdflibLiteral): 414 val = str(value) 415 else : 416 val = value 417 if self.length is not None: 418 return self.length == len(val) 419 else : 420 return True
421
422 - def _check_pattern(self, value):
423 """ 424 Check the (python) value against array of regular expressions. 425 @param value: the value to be checked 426 @rtype: boolean 427 """ 428 if isinstance(value, rdflibLiteral): 429 val = str(value) 430 else : 431 val = value 432 for p in self.pattern : 433 if p.match(val) is None : 434 return False 435 return True
436
437 - def _check_lang_range(self, value):
438 """ 439 Check the (python) value against array of language ranges. 440 @param value: the value to be checked 441 @rtype: boolean 442 """ 443 if isinstance(value, rdflibLiteral) : 444 lang = value.language 445 else : 446 return False 447 for r in self.langRange : 448 if not _lang_range_check(r, lang): 449 return False 450 return True
451