91 строка
2.7 KiB
Python
91 строка
2.7 KiB
Python
from rdflib.graph import Graph
|
|
from rdflib import URIRef
|
|
from StringIO import StringIO
|
|
|
|
class RDFParser(object):
|
|
"""This little gem (not to be confused with a Ruby gem) loads and
|
|
parses an RDF file."""
|
|
|
|
def __init__(self, data, namespace=None):
|
|
"""Open a file, attempt to parse it.
|
|
|
|
If we can parse the file, return the structure;
|
|
otherwise None"""
|
|
|
|
# Load up and parse the file in XML format.
|
|
graph = Graph()
|
|
|
|
# Try it!
|
|
if not isinstance(data, StringIO):
|
|
pseudo_file = StringIO(data) # Wrap data in a pseudo-file
|
|
else:
|
|
pseudo_file = data
|
|
|
|
try:
|
|
graph.parse(pseudo_file, format="xml")
|
|
except Exception as error:
|
|
self.rdf = None
|
|
return
|
|
else:
|
|
self.rdf = graph
|
|
|
|
if namespace is None:
|
|
self.namespace = "http://www.mozilla.org/2004/em-rdf"
|
|
else:
|
|
self.namespace = namespace
|
|
|
|
def uri(self, element, namespace=None):
|
|
"Returns a URIRef object for use with the RDF document."
|
|
|
|
if namespace is None:
|
|
namespace = self.namespace
|
|
|
|
return URIRef("%s#%s" % (namespace, element))
|
|
|
|
def get_root_subject(self):
|
|
"""Returns the BNode which describes the topmost subject of the
|
|
graph.
|
|
|
|
NOTE: This is a terribly inefficient function, but until rdflib
|
|
exposes a better way to do it, there isn't much of an
|
|
alternative."""
|
|
|
|
subjects = list(self.rdf.subjects())
|
|
|
|
target = subjects[0]
|
|
while True:
|
|
subjects = list(self.rdf.subjects(None, target))
|
|
|
|
if not subjects:
|
|
break
|
|
else:
|
|
target = subjects[0]
|
|
|
|
return target
|
|
|
|
|
|
def get_object(self, subject=None, predicate=None):
|
|
"""Eliminates some of the glue code for searching RDF. Pass
|
|
in a URIRef object (generated by the `uri` function above or
|
|
a BNode object (returned by this function) for either of the
|
|
parameters."""
|
|
|
|
# Get the result of the search
|
|
results = self.rdf.objects(subject, predicate)
|
|
as_list = list(results)
|
|
|
|
# Don't raise exceptions, value test!
|
|
if not as_list:
|
|
return None
|
|
|
|
return as_list[0]
|
|
|
|
|
|
def get_objects(self, subject=None, predicate=None):
|
|
"""Same as get_object, except returns a list of objects which
|
|
satisfy the query rather than a single result."""
|
|
|
|
# Get the result of the search
|
|
results = self.rdf.objects(subject, predicate)
|
|
return list(results)
|