from sets import Set from pychinko.rete import RuleCompiler, Rete from pychinko.interpreter import Interpreter from pychinko.terms import URI, Rule, Variable, Fact, Pattern, Literal, Exivar from pychinko.pychinkafon import N3Sink, N3Parser from pychinko.builtins import Builtin, logimplies, builtinp from pychinko.nodes import RuleNode from pychinko.helpers import keysToList, removedups, bindingsp class QueryN3Sink(N3Sink): def __init__(self): self.facts = list() self.rules = list() self.formulae = dict() def makeStatement(self, (subj, pred, obj, scp)): # Quick hack, just dealing with URIs and Variables # Plus, facts are ground # No nested rules if scp[1].endswith('RootFormula'): f = [] if pred != logimplies: for t, v in (subj, pred, obj): if t == 'Exivar': f.append(Exivar(v)) if t == 'Univar': f.append(Variable(v)) if t == 'URIRef': f.append(URI(v)) if t == 'Lit': f.append(Literal(v)) self.facts.append(Pattern(f[0], f[1], f[2])) class SimpleQueryEvaluator: def __init__(self, store): self.store = store self.joinedBetaNodes = Set() def queryFromString(self, queryString): return self.query(removedups(self.parseN3(queryString))) def queryFromFile(self, queryFile): fp = open(queryFile, 'r') result = self.query(removedups(self.parseN3(fp.read()))) fp.close() return result def query(self, query): compiler = RuleCompiler() self.rete = compiler.compile([Rule(query, [])]) self.results = list() #Should be no dups self.runQuery() return self.results def runQuery(self): # New eval round self.joinedBetaNodes = Set() # Initialize alphas for alphaNode in self.rete.alphaNodeStore: alphaNode.addAll(self.store.triples(alphaNode.pattern.noneBasedPattern())) # Run joins for alphaNode in self.rete.alphaNodeStore: for betaNode in alphaNode.betaNodes: self.processBetaNode(betaNode) def processBetaNode(self, betaNode): """I process a beta node""" if betaNode not in self.joinedBetaNodes: inferences = betaNode.join(useBindings=True) self.joinedBetaNodes.add(betaNode) if inferences: if betaNode.rule: for i in inferences: if bindingsp(i): #it's a set of bindings returned by join #(this will be after a builtin was evaluated) self.results.append(i) else: #it's a list, so match up the variables (get a binding) self.results.append(betaNode.getbindings(i)) # for i in inferences: # self.results.append(i) else: # There will only be more than one child if # there are shared nodes and muliple query results for child in betaNode.children: #process children of BetaNode.. #pop(0) returns first element (FIFO) self.processBetaNode(child) def parseN3(self, queryString): sink = QueryN3Sink() parser = N3Parser(sink, '') parser.startDoc() parser.feed(queryString) parser.endDoc() return sink.facts if __name__ == "__main__": from rdflib.TripleStore import TripleStore import time t = time.time() store = TripleStore() # for fn in ['2004-07-29.rdf']: for fn in ['ruletests/tests/rdfig/test.rdf']: # for fn in ['2004-07-29-small.rdf']: store.load(fn) #import glob #for fn in glob.glob('rdfiglogs/*'): # store.load(fn) print 'Time to load:',time.time() - t print 'Size of store:',len(store) #print 'hits',len(list(store.triples((None, URI('http://purl.org/dc/elements/1.1/date'), None)))) t = time.time() querier = SimpleQueryEvaluator(store) Query = """ @prefix wn: . @prefix foaf: . @prefix dc: . @prefix string: . @prefix math: . ?y dc:date ?a. ?x foaf:nick ?z. ?y dc:creator ?x. ?x dc:age ?g. ?z string:contains "i". ?g math:greaterThan "28". ?g math:lessThan "30". """ # Query = """ # @prefix wn: . # @prefix foaf: . # @prefix dc: . # @prefix string: . # _:y dc:date _:a. # _:y dc:creator _:x. # _:x foaf:nick _:z. # _:z string:contains "j". # _:z string:contains "b". # _:z string:contains "i". # """ withProfile = False if withProfile: import hotshot, hotshot.stats prof = hotshot.Profile("pychinko.prof") benchtime = prof.runcall(querier.queryFromString, Query) stats = hotshot.stats.load("pychinko.prof") stats.strip_dirs() stats.sort_stats('time', 'calls') stats.print_stats(23) prof.close() else: rows = querier.queryFromString(Query) print 'Time to query:', time.time() - t #print rows[0:10] for row in rows: print row print 'Number of hits:', len(rows)