###ruletests.py ### ###Tests for completeness and soundness against CWM from rdflib.Graph import Graph from pychinko.generator import generate from pychinko.main import Pychinko from pychinko import config from pychinko.helpers import countTriples, handleURL, FIRST, REST, TYPE, IMPLIES import pychinko.exception import getopt import os import sys import unittest import copy import time def compareOutputs(file1, file2): """I compare the NTriples outputs of file1 against file2 and vice versa. This function presumes that the output in file1 is the more reliable of the two. I return true upon success, false otherwise.""" file1 = handleURL(file1) file2 = handleURL(file2) store = Graph() file1fp = open(file1, 'r') if file1.endswith('n3'): store.parse(file1fp, format="n3") else: store.parse(file1fp) store2 = Graph() file2fp = open(file2, 'r') if file2.endswith('n3'): print 'parsing:', file2 store2.parse(file2fp, format="n3") else: store2.parse(file2fp) diff = False incompleteness = list() unsoundness = list() incomp = False usound = False for s, p, o in store: if (s, p, o) not in store2: print "incomplete: ", s, p, o diff = True incomp = True incompleteness.append((s, p, o, file2)) for s, p, o in store2: if (s, p, o) not in store: if not p in [FIRST,REST,TYPE, IMPLIES]: #ignore list-related triples print "usound: ", s, p, o diff = True usound = True unsoundness.append((s, p, o, file1)) if not diff: print "no differences." return True else: print "First store: %s" %(len(store)) print "Second store %s" %(len(store2)) if incomp: print "\n===> ", len(incompleteness), " missing facts.\n" if usound: print "\n===> ", len(unsoundness), " unsound facts.\n" return False #This is not in accordance with typical unit test idioms, but #I think it's appropriate for what we're trying to do class RuleTest(unittest.TestCase): def setUp(self): self.genericRules = dict([('rules/sameVarRules.n3', "Rules that reuse the same variables on left sides"), ('rules/onePatternRules.n3', "One-pattern rules"), ('rules/spoRules.n3', "Rules that use ?s ?p ?o")]) self.builtinRules = dict([('http://www.mindswap.org/~katz/pychinko/rules/stringBuiltins.n3', "Rules that test basic string builtins")]) #(size, randomFactsFilename) : (cwmOutputFilename, pychOutputFilename) self.randomTestFiles = {(100, 'generatedtests/testfacts.100.n3'): ('generatedtests/testoutput.cwm.100.n3', 'generatedtests/testoutput.pychinko.100.n3'), (200, 'generatedtests/testfacts.200.n3'): ('generatedtests/testoutput.cwm.200.n3', 'generatedtests/testoutput.pychinko.200.n3'), (500, 'generatedtests/testfacts.500.n3'): ('generatedtests/testoutput.cwm.500.n3', 'generatedtests/testoutput.pychinko.500.n3'), (700, 'generatedtests/testfacts.700.n3'): ('generatedtests/testoutput.cwm.700.n3', 'generatedtests/testoutput.pychinko.700.n3'), (1200, 'generatedtests/testfacts.1200.n3'): ('generatedtests/testoutput.cwm.1200.n3', 'generatedtests/testoutput.pychinko.1200.n3'), (7000, 'generatedtests/testfacts.7000.n3'): ('generatedtests/testoutput.cwm.7000.n3', 'generatedtests/testoutput.pychinko.7000.n3')} self.groundTestFiles = {('groundtests/rdfs-closure.n3', 'rules/rdfs-rules.n3'): ('generatedtests/testoutput.rdfs-closure1.cwm.n3', 'generatedtests/testoutput.rdfs-closure1.pychinko.n3'), ('rules/rdfs-facts.n3', 'rules/rdfs-rules.n3'): ('generatedtests/testoutput.rdfs-closure2.cwm.n3', 'generatedtests/testoutput.rdfs-closure2.pychinko.n3'), #builtin tests ('groundtests/smallTest.n3', 'rules/sameVarRules2.n3'): ('generatedtests/testoutput.sameVarRules2.cwm.n3', 'generatedtests/testoutput.sameVarRules2.pychinko.n3'), ('groundtests/smallTest.n3', 'rules/sameVarRules4.n3'): ('generatedtests/testoutput.sameVarRules4.cwm.n3', 'generatedtests/testoutput.sameVarRules4.pychinko.n3'), ('groundtests/smallTest.n3', 'rules/sameVarRules5.n3'): ('generatedtests/testoutput.sameVarRules5.cwm.n3', 'generatedtests/testoutput.sameVarRules5.pychinko.n3'), ('groundtests/smallTest.n3', 'rules/sameVarRules6.n3'): ('generatedtests/testoutput.sameVarRules6.cwm.n3', 'generatedtests/testoutput.sameVarRules6.pychinko.n3'), ('groundtests/smallTest.n3', 'rules/sameVarRules7.n3'): ('generatedtests/testoutput.sameVarRules7.cwm.n3', 'generatedtests/testoutput.sameVarRules7.pychinko.n3'), } #generate the random facts #for t in self.randomTestFiles: # generate(t[0], filename=t[1]) def compareAgainstCWM(self, factsFile, rulesFile, cwmOutput, pychOutput): cwmOutput = handleURL(cwmOutput) pychOutput = handleURL(pychOutput) if (os.name=='nt'): cwmCmd = "python %s %s --ntriples --think=%s --base=http://www.mindswap.org/~katz/ --purge > %s" %(config.CWM, factsFile, rulesFile, cwmOutput) else: cwmCmd = "time %s %s --ntriples --think=%s --base=http://www.mindswap.org/~katz/ --purge > %s" %(config.CWM, factsFile, rulesFile, cwmOutput) print "CWM COMMAND: ", cwmCmd t = time.time() pychinko = Pychinko() pychinko.setupAndRun(factsFile, rulesFile, pychOutput) print "Pychinko time:", time.time() - t print "CWM time: " os.system(cwmCmd) print "comparing %s and %s" %(cwmOutput, pychOutput) self.assert_(compareOutputs(cwmOutput, pychOutput)) def runRandomTests(self, testFiles, rules): for test, output in testFiles.items(): print "initial size:", test[0] for rule in rules: print "Testing ", rules[rule] self.compareAgainstCWM(test[1], rule, output[0], output[1]) def runGroundTests(self, tests): for factsRules, outputs in tests.items(): print "Testing ", factsRules[1] self.compareAgainstCWM(factsRules[0], factsRules[1], outputs[0], outputs[1]) def testAllRules(self): #self.runRandomTests(self.randomTestFiles, self.genericRules) self.runGroundTests(self.groundTestFiles) if __name__ == '__main__': unittest.main()