import sys #sys.path += ['cwm'] from sets import Set from afon import N3SyntaxError, N3Parser, N3Sink, TestN3Sink from terms import URI, Rule, Variable, Fact, Pattern, Exivar, BuiltinRule, Formula from rdflib.Literal import Literal from rdflib.URIRef import URIRef from helpers import removedups from string import lower from types import ListType import exception logimplies = ('URIRef', 'http://www.w3.org/2000/10/swap/log#implies') assertURI = ('URIRef', 'http://www.mindswap.org/~katz/pychinko/builtins#assert') def serializeTripleElt(elem): if isinstance(elem, Literal): return '"' + elem + '"' elif isinstance(elem, Exivar): return str(elem) elif isinstance(elem, URIRef): if not elem[0] == '<': return '<' + elem + '>' else: print "[", elem, "]" elif isinstance(elem, Pattern): return '{ ' + str(elem.s) + ' ' + str(elem.p) + ' ' + str(elem.o) + ' }' elif isinstance(elem, Formula): return elem.id else: return elem class PychinkoN3Sink(N3Sink): def __init__(self): self.facts = list() self.rules = list() self.formulae = dict() def startDoc(self, formula, baseURI): self.baseURI = baseURI def N3TypeToObj(self, (t, v)): """I turn an Afon-read triple element to its appropriate Pychinko object representation.""" # print "t: ", t, " v: ", v if t == 'Exivar': return Exivar(v) elif t == 'Univar': return Variable(v) elif t == 'URIRef': # encode URIs as unicode strings return URI(v) elif t == 'FORMULA': if v.endswith('RootFormula'): return Literal("RootFormula") else: return self.extractFormula((t, v)) elif t.startswith('Lit'): return Literal(v) else: raise exception.UnknownN3Type(t, v) def extractFormula(self, (t, v)): return self.formulae[(t, v)][0] def extractNestedRule(self, subj, pred, obj): if type(subj) == ListType and type(obj) == ListType: #end of nesting return Rule(subj, obj) if not isinstance(pred, URI): pred = self.N3TypeToObj(pred) self.extractNestedRule(self.N3TypeToObj(subj), pred, self.N3TypeToObj(obj)) def addFact(self, fact): """Add a fact in unserialized form""" self.facts.append(fact) def makeStatement(self, (subj, pred, obj, scp)): # Quick hack, just dealing with URIs and Variables # Plus, facts are ground # print "subj: ", subj, " pred: ", pred, " obj: ", obj, " scp: ", scp # print "formulae: ", self.formulae if scp[1].endswith('RootFormula'): if pred == assertURI: self.rules.append(BuiltinRule([Pattern('Root', 'a', 'Formula')], removedups(self.formulae[obj]), assertURI)) s = serializeTripleElt(self.N3TypeToObj(subj)) p = serializeTripleElt(self.N3TypeToObj(pred)) o = self.N3TypeToObj(obj) # could be a formula, so don't serialize # self.facts.append(Fact(s, p, o)) elif pred == logimplies: self.rules.append(Rule(removedups(self.formulae[subj]), removedups(self.formulae[obj]))) s = serializeTripleElt(self.N3TypeToObj(subj)) p = serializeTripleElt(self.N3TypeToObj(pred)) o = self.N3TypeToObj(obj) # self.facts.append(Fact(s, p, o)) else: newfact = Fact(self.N3TypeToObj(subj), self.N3TypeToObj(pred), self.N3TypeToObj(obj)) # print "newfact: ", newfact self.addFact(newfact) s = serializeTripleElt(self.N3TypeToObj(subj)) # problematic line; handling unicode for predicates p = serializeTripleElt(self.N3TypeToObj(pred)) o = serializeTripleElt(self.N3TypeToObj(obj)) # print "Serialized form: ", s, p, o # self.facts.append(Fact(s, p, o)) else: f = [] for t, v in (subj, pred, obj): if t == 'FORMULA': r = self.extractNestedRule(self.formulae[subj], pred, self.formulae[obj]) self.rules.append(r) f.append(r) else: f.append(self.N3TypeToObj((t, v))) if scp not in self.formulae: self.formulae[scp] = list() if f: self.formulae[scp].append(Pattern(f[0], f[1], f[2])) # self.formulae[scp].append(Pattern(serializeTripleElt(f[0]), serializeTripleElt(f[1]), # serializeTripleElt(f[2]))) self.formulae[scp] = removedups(self.formulae[scp]) if __name__ == "__main__": if len(sys.argv) == 1: print __doc__ elif sys.argv[1].endswith('-test'): import urllib t = '@prefix : <#> .\n:x _:y "blargh phenomic etc.\u203D" .' uri = 'data:,'+urllib.quote(t) sink = TestN3Sink() p = N3Parser(sink, uri) p.load(uri) else: sink = PychinkoN3Sink() for i in sys.argv[1:]: p = N3Parser(sink, i) p.load(i) print sink.facts print sink.rules