Source code for examples.berkeleydb_example
"""
BerkeleyDB in use as a persistent Graph store.
Example 1: simple actions
* creating a ConjunctiveGraph using the BerkeleyDB Store
* adding triples to it
* counting them
* closing the store, emptying the graph
* re-opening the store using the same DB files
* getting the same count of triples as before
Example 2: larger data
* loads multiple graphs downloaded from GitHub into a BerkeleyDB-baked graph stored in the folder gsq_vocabs.
* does not delete the DB at the end so you can see it on disk
"""
import os
import tempfile
from rdflib import ConjunctiveGraph, Literal, Namespace
from rdflib.plugins.stores.berkeleydb import has_bsddb
from rdflib.store import NO_STORE, VALID_STORE
[docs]def example_1():
"""Creates a ConjunctiveGraph and performs some BerkeleyDB tasks with it"""
path = tempfile.NamedTemporaryFile().name
# Declare we are using a BerkeleyDB Store
graph = ConjunctiveGraph("BerkeleyDB")
# Open previously created store, or create it if it doesn't exist yet
# (always doesn't exist in this example as using temp file location)
rt = graph.open(path, create=False)
if rt == NO_STORE:
# There is no underlying BerkeleyDB infrastructure, so create it
print("Creating new DB")
graph.open(path, create=True)
else:
print("Using existing DB")
assert rt == VALID_STORE, "The underlying store is corrupt"
print("Triples in graph before add:", len(graph))
print("(will always be 0 when using temp file for DB)")
# Now we'll add some triples to the graph & commit the changes
EG = Namespace("http://example.net/test/")
graph.bind("eg", EG)
graph.add((EG["pic:1"], EG.name, Literal("Jane & Bob")))
graph.add((EG["pic:2"], EG.name, Literal("Squirrel in Tree")))
graph.commit()
print("Triples in graph after add:", len(graph))
print("(should be 2)")
# display the graph in Turtle
print(graph.serialize())
# close when done, otherwise BerkeleyDB will leak lock entries.
graph.close()
graph = None
# reopen the graph
graph = ConjunctiveGraph("BerkeleyDB")
graph.open(path, create=False)
print("Triples still in graph:", len(graph))
print("(should still be 2)")
graph.close()
# Clean up the temp folder to remove the BerkeleyDB database files...
for f in os.listdir(path):
os.unlink(path + "/" + f)
os.rmdir(path)
[docs]def example_2():
"""Loads a number of SKOS vocabularies from GitHub into a BerkeleyDB-backed graph stored in the local folder
'gsq_vocabs'
Should print out the number of triples after each load, e.g.:
177
248
289
379
421
628
764
813
965
1381
9666
9719
...
"""
import base64
import json
from urllib.error import HTTPError
from urllib.request import Request, urlopen
g = ConjunctiveGraph("BerkeleyDB")
g.open("gsg_vocabs", create=True)
# gsq_vocabs = "https://api.github.com/repos/geological-survey-of-queensland/vocabularies/git/trees/master"
gsq_vocabs = "https://api.github.com/repos/geological-survey-of-queensland/vocabularies/git/trees/cd7244d39337c1f4ef164b1cf1ea1f540a7277db"
try:
res = urlopen(Request(gsq_vocabs, headers={"Accept": "application/json"}))
except HTTPError as e:
return e.code, str(e), None
data = res.read()
encoding = res.info().get_content_charset("utf-8")
j = json.loads(data.decode(encoding))
for v in j["tree"]:
# process the element in GitHub result if it's a Turtle file
if v["path"].endswith(".ttl"):
# for each file, call it by URL, decode it and parse it into the graph
r = urlopen(v["url"])
content = json.loads(r.read().decode())["content"]
g.parse(data=base64.b64decode(content).decode(), format="turtle")
print(len(g))
print("loading complete")
if __name__ == "__main__":
if has_bsddb:
# Only run the examples if BerkeleyDB is available
example_1()
example_2()