from sets import Set
from pychinko.rete import RuleCompiler, Rete
from pychinko.interpreter import Interpreter
from pychinko.terms import URI, Rule, Variable, Fact, Pattern, Literal, Exivar
from pychinko.pychinkafon import N3Sink, N3Parser
from pychinko.builtins import Builtin, logimplies, builtinp
from pychinko.nodes import RuleNode
from pychinko.helpers import keysToList, removedups, bindingsp
class QueryN3Sink(N3Sink):
def __init__(self):
self.facts = list()
self.rules = list()
self.formulae = dict()
def makeStatement(self, (subj, pred, obj, scp)):
# Quick hack, just dealing with URIs and Variables
# Plus, facts are ground
# No nested rules
if scp[1].endswith('RootFormula'):
f = []
if pred != logimplies:
for t, v in (subj, pred, obj):
if t == 'Exivar':
f.append(Exivar(v))
if t == 'Univar':
f.append(Variable(v))
if t == 'URIRef':
f.append(URI(v))
if t == 'Lit':
f.append(Literal(v))
self.facts.append(Pattern(f[0], f[1], f[2]))
class SimpleQueryEvaluator:
def __init__(self, store):
self.store = store
self.joinedBetaNodes = Set()
def queryFromString(self, queryString):
return self.query(removedups(self.parseN3(queryString)))
def queryFromFile(self, queryFile):
fp = open(queryFile, 'r')
result = self.query(removedups(self.parseN3(fp.read())))
fp.close()
return result
def query(self, query):
compiler = RuleCompiler()
self.rete = compiler.compile([Rule(query, [])])
self.results = list() #Should be no dups
self.runQuery()
return self.results
def runQuery(self):
# New eval round
self.joinedBetaNodes = Set()
# Initialize alphas
for alphaNode in self.rete.alphaNodeStore:
alphaNode.addAll(self.store.triples(alphaNode.pattern.noneBasedPattern()))
# Run joins
for alphaNode in self.rete.alphaNodeStore:
for betaNode in alphaNode.betaNodes:
self.processBetaNode(betaNode)
def processBetaNode(self, betaNode):
"""I process a beta node"""
if betaNode not in self.joinedBetaNodes:
inferences = betaNode.join(useBindings=True)
self.joinedBetaNodes.add(betaNode)
if inferences:
if betaNode.rule:
for i in inferences:
if bindingsp(i):
#it's a set of bindings returned by join
#(this will be after a builtin was evaluated)
self.results.append(i)
else:
#it's a list, so match up the variables (get a binding)
self.results.append(betaNode.getbindings(i))
# for i in inferences:
# self.results.append(i)
else:
# There will only be more than one child if
# there are shared nodes and muliple query results
for child in betaNode.children:
#process children of BetaNode..
#pop(0) returns first element (FIFO)
self.processBetaNode(child)
def parseN3(self, queryString):
sink = QueryN3Sink()
parser = N3Parser(sink, '')
parser.startDoc()
parser.feed(queryString)
parser.endDoc()
return sink.facts
if __name__ == "__main__":
from rdflib.TripleStore import TripleStore
import time
t = time.time()
store = TripleStore()
# for fn in ['2004-07-29.rdf']:
for fn in ['ruletests/tests/rdfig/test.rdf']:
# for fn in ['2004-07-29-small.rdf']:
store.load(fn)
#import glob
#for fn in glob.glob('rdfiglogs/*'):
# store.load(fn)
print 'Time to load:',time.time() - t
print 'Size of store:',len(store)
#print 'hits',len(list(store.triples((None, URI('http://purl.org/dc/elements/1.1/date'), None))))
t = time.time()
querier = SimpleQueryEvaluator(store)
Query = """
@prefix wn: .
@prefix foaf: .
@prefix dc: .
@prefix string: .
@prefix math: .
?y dc:date ?a.
?x foaf:nick ?z.
?y dc:creator ?x.
?x dc:age ?g.
?z string:contains "i".
?g math:greaterThan "28".
?g math:lessThan "30".
"""
# Query = """
# @prefix wn: .
# @prefix foaf: .
# @prefix dc: .
# @prefix string: .
# _:y dc:date _:a.
# _:y dc:creator _:x.
# _:x foaf:nick _:z.
# _:z string:contains "j".
# _:z string:contains "b".
# _:z string:contains "i".
# """
withProfile = False
if withProfile:
import hotshot, hotshot.stats
prof = hotshot.Profile("pychinko.prof")
benchtime = prof.runcall(querier.queryFromString, Query)
stats = hotshot.stats.load("pychinko.prof")
stats.strip_dirs()
stats.sort_stats('time', 'calls')
stats.print_stats(23)
prof.close()
else:
rows = querier.queryFromString(Query)
print 'Time to query:', time.time() - t
#print rows[0:10]
for row in rows:
print row
print 'Number of hits:', len(rows)