Source code for rdflib.tools.defined_namespace_creator

from __future__ import annotations

"""
This rdflib Python script creates a DefinedNamespace Python file from a given RDF file

It is a very simple script: it finds all things defined in the RDF file within a given
namespace:

    <thing> a ?x

    where ?x is anything and <thing> starts with the given namespace

Nicholas J. Car, Dec, 2021
"""
import argparse
import datetime
import sys
from pathlib import Path
from typing import TYPE_CHECKING, Iterable, List, Tuple

sys.path.append(str(Path(__file__).parent.absolute().parent.parent))

from rdflib.graph import Graph  # noqa: E402
from rdflib.namespace import DCTERMS, OWL, RDFS, SKOS  # noqa: E402
from rdflib.util import guess_format  # noqa: E402

if TYPE_CHECKING:
    from rdflib.query import ResultRow


[docs]def validate_namespace(namespace: str) -> None: if not namespace.endswith(("/", "#")): raise ValueError("The supplied namespace must end with '/' or '#'")
[docs]def validate_object_id(object_id: str) -> None: for c in object_id: if not c.isupper(): raise ValueError("The supplied object_id must be an all-capitals string")
# This function is not used: it was originally written to get classes and to be used # alongside a method to get properties, but then it was decided that a single function # to get everything in the namespace, get_target_namespace_elements(), was both simper # and better covered all namespace elements, so that function is used instead. # # def get_classes(g, target_namespace): # namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS} # q = """ # SELECT DISTINCT ?x ?def # WHERE { # # anything that is an instance of owl:Class or rdfs:Class # # or any subclass of them # VALUES ?c { owl:Class rdfs:Class } # ?x rdfs:subClassOf*/a ?c . # # # get any definitions, if they have one # OPTIONAL { # ?x rdfs:comment|dcterms:description|skos:definition ?def # } # # # only get results for the targetted namespace (supplied by user) # FILTER STRSTARTS(STR(?x), "xxx") # } # """.replace("xxx", target_namespace) # classes = [] # for r in g.query(q, initNs=namespaces): # classes.append((str(r[0]), str(r[1]))) # # classes.sort(key=lambda tup: tup[1]) # # return classes
[docs]def get_target_namespace_elements( g: Graph, target_namespace: str ) -> Tuple[List[Tuple[str, str]], List[str]]: namespaces = {"dcterms": DCTERMS, "owl": OWL, "rdfs": RDFS, "skos": SKOS} q = """ SELECT ?s (GROUP_CONCAT(DISTINCT STR(?def)) AS ?defs) WHERE { # all things in the RDF data (anything RDF.type...) ?s a ?o . # get any definitions, if they have one OPTIONAL { ?s dcterms:description|rdfs:comment|skos:definition ?def } # only get results for the target namespace (supplied by user) FILTER STRSTARTS(STR(?s), "xxx") } GROUP BY ?s """.replace( "xxx", target_namespace ) elements: List[Tuple[str, str]] = [] for r in g.query(q, initNs=namespaces): if TYPE_CHECKING: assert isinstance(r, ResultRow) elements.append((str(r[0]), str(r[1]))) elements.sort(key=lambda tup: tup[0]) elements_strs: List[str] = [] for e in elements: desc = e[1].replace("\n", " ") elements_strs.append( f" {e[0].replace(target_namespace, '')}: URIRef # {desc}\n" ) return elements, elements_strs
[docs]def make_dn_file( output_file_name: Path, target_namespace: str, elements_strs: Iterable[str], object_id: str, fail: bool, ) -> None: header = f'''from rdflib.term import URIRef from rdflib.namespace import DefinedNamespace, Namespace class {object_id}(DefinedNamespace): """ DESCRIPTION_EDIT_ME_! Generated from: SOURCE_RDF_FILE_EDIT_ME_! Date: {datetime.datetime.utcnow()} """ ''' with open(output_file_name, "w") as f: f.write(header) f.write("\n") f.write(f' _NS = Namespace("{target_namespace}")') f.write("\n\n") if fail: f.write(" _fail = True") f.write("\n\n") f.writelines(elements_strs)
if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument( "ontology_file", type=str, help="Path to the RDF ontology to extract a DefinedNamespace from.", ) parser.add_argument( "target_namespace", type=str, help="The namespace within the ontology that you want to create a " "DefinedNamespace for.", ) parser.add_argument( "object_id", type=str, help="The RDFlib object ID of the DefinedNamespace, e.g. GEO for GeoSPARQL.", ) parser.add_argument( "-f", "--fail", dest="fail", action="store_true", help="Whether (true) or not (false) to mimic ClosedNamespace and fail on " "non-element use", ) parser.add_argument("--no-fail", dest="fail", action="store_false") parser.set_defaults(feature=False) args = parser.parse_args() fmt = guess_format(args.ontology_file) if fmt is None: print("The format of the file you've supplied is unknown.") exit(1) g = Graph().parse(args.ontology_file, format=fmt) validate_namespace(args.target_namespace) validate_object_id(args.object_id) print( f"Creating DefinedNamespace file {args.object_id} " f"for {args.target_namespace}..." ) print(f"Ontology with {len(g)} triples loaded...") print("Getting all namespace elements...") elements = get_target_namespace_elements(g, args.target_namespace) output_file_name = Path().cwd() / f"_{args.object_id}.py" print(f"Creating DefinedNamespace Python file {output_file_name}") make_dn_file( output_file_name, args.target_namespace, elements[1], args.object_id, args.fail )