diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..e25672d --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ +MAKEFLAGS += --warn-undefined-variables +SHELL := bash +.SHELLFLAGS := -eu -o pipefail -c +.DEFAULT_GOAL := round-trip +.DELETE_ON_ERROR: +.SUFFIXES: + +#args = --log --filter 'obo:CLO_0000001' +#args = --log --filter 'BFO:0000027' +#args = --log --filter 'OBI:0100061' + +# round-trip: build/obi_core.tsv obi_core_no_trailing_ws.owl +round-trip: tests/thin.tsv tests/resources/example.rdf + tests/prototype.py $(args) $^ diff --git a/tests/prefix.tsv b/tests/prefix.tsv new file mode 100644 index 0000000..cadcabd --- /dev/null +++ b/tests/prefix.tsv @@ -0,0 +1,6 @@ +prefix base +rdf http://www.w3.org/1999/02/22-rdf-syntax-ns# +rdfs http://www.w3.org/2000/01/rdf-schema# +xsd http://www.w3.org/2001/XMLSchema# +owl http://www.w3.org/2002/07/owl# +ex http://example.com/ diff --git a/tests/prototype.py b/tests/prototype.py new file mode 100755 index 0000000..7656267 --- /dev/null +++ b/tests/prototype.py @@ -0,0 +1,747 @@ +#!/usr/bin/env python3 + +import csv +import json +import re +import sqlite3 +import sys + +from argparse import ArgumentParser +from collections import OrderedDict +from copy import deepcopy +import functools +from gizmos.hiccup import render +from pprint import pformat +from rdflib import Graph, BNode, URIRef, Literal + +from util import compare_graphs + +thin_input = None +expected_owl = None + +# Create an OrderedDict of prefixes, sorted in descending order by the length +# of the prefix's long form: +prefixes = [] +with open("tests/resources/prefix.tsv") as fh: + rows = csv.DictReader(fh, delimiter="\t") + for row in rows: + if row.get("prefix"): + prefixes.append((row["prefix"], row["base"])) +prefixes.sort(key=lambda x: len(x[1]), reverse=True) +prefixes = OrderedDict(prefixes) + +debug = False +nesting = 0 +def log(message): + if debug: + global nesting + message = message.replace('\n', '\n' + ' '*nesting) + print(' '*nesting, end='', file=sys.stderr) + print(message, file=sys.stderr) + +def nest_log(fn): + """Decorator to increase the indentation of the log; useful for recursive debugging.""" + @functools.wraps(fn) + def wrapped(*args, **kwargs): + global nesting + log("Entering function {} ...".format(fn.__name__)) + nesting += 1 + ret = fn(*args, **kwargs) + nesting -= 1 + log("Exited function {} ...".format(fn.__name__)) + return ret + return wrapped + + +# def dict_factory(cursor, row): +# d = {} +# for idx, col in enumerate(cursor.description): +# d[col[0]] = row[idx] +# return d +# con = sqlite3.connect('obi_core.db') +# con.row_factory = dict_factory +# cur = con.cursor() +# thin = [] +# for row in cur.execute('SELECT * FROM statements'): +# thin.append(row) + +@nest_log +def renderSubjects(subjects): + """Print a nested subject dict as indented lines. + From + {"ex:s": {"ex:p": [{"object": "ex:o"}]}} + to + ex:s + ex:p + {"object": "ex:o"} + """ + for subject_id in sorted(list(subjects.keys())): + print(subject_id) + for predicate in sorted(list(subjects[subject_id].keys())): + print(" ", predicate) + for obj in subjects[subject_id][predicate]: + print(" ", obj) + +@nest_log +def row2objectMap(row): + """Convert a row dict to an object map. + From + {"subject": "ex:s", "predicate": "ex:p", "object": "ex:o"} + to + {"object": "ex:o"} + {"value": "Foo"} + {"value": "Foo", "language": "en"} + {"value": "0.123", "datatype": "xsd:float"} + """ + if row.get("object"): + return {"object": row["object"]} + elif row.get("value") is not None: + if row.get("datatype"): + return {"value": row["value"], "datatype": row["datatype"]} + elif row.get("language"): + return {"value": row["value"], "language": row["language"]} + elif row["value"]: + return {"value": row["value"]} + + log("Invalid RDF row {}".format(row)) + raise Exception("Invalid RDF row") + +@nest_log +def thin2subjects(thin): + """Convert a list of thin rows to a nested subjects map: + From + [{"subject": "ex:s", "predicate": "ex:p", "object": "ex:o"}] + to + {"ex:s": {"ex:p": [{"object": "ex:o"}]}} + """ + dependencies = {} + subject_ids = set(x["subject"] for x in thin) + subjects = {} + + # Convert rows to a subject dict. + for subject_id in subject_ids: + predicates = {} + for row in thin: + if row["subject"] != subject_id: + continue + predicate = row["predicate"] + if predicate not in predicates: + predicates[predicate] = [] + objects = predicates[predicate] + objects.append(row2objectMap(row)) + objects.sort(key=lambda k: str(k)) + predicates[predicate] = objects + if row.get("object") and row["object"].startswith("_:"): + if not subject_id in dependencies: + dependencies[subject_id] = set() + dependencies[subject_id].add(row["object"]) + subjects[subject_id] = predicates + + # Work from leaves to root, nesting the blank structures. + last_leaves = 0 + while dependencies: + leaves = set(subjects.keys()) - set(dependencies.keys()) + if len(leaves) == last_leaves: + # This is not necessarily a problem, so we comment out the `break` statement here, but + # we emit a warning anyway. + log("LOOP!?") + # break + last_leaves = len(leaves) + dependencies = {} + handled = set() + for subject_id, predicates in subjects.items(): + for predicate in predicates.keys(): + objects = [] + for obj in predicates[predicate]: + if not obj: + log("Bad object: <{} {} {}>".format(subject_id, predicate, obj)) + continue + o = obj.get("object") + if o and isinstance(o, str) and o.startswith("_:"): + if o in leaves: + obj = {"object": subjects[o]} + handled.add(o) + else: + if not subject_id in dependencies: + dependencies[subject_id] = set() + dependencies[subject_id].add(o) + objects.append(obj) + objects.sort(key=lambda k: str(k)) + predicates[predicate] = objects + for subject_id in handled: + del subjects[subject_id] + + remove = set() + subjects_copy = {} + for subject_id in sorted(subjects.keys()): + if not subjects_copy.get(subject_id): + subjects_copy[subject_id] = deepcopy(subjects[subject_id]) + + if subjects_copy[subject_id].get("owl:annotatedSource"): + log("Annotating subject: {}".format(subject_id)) + log("This is what we've got to work with:\n{}".format(pformat(subjects_copy[subject_id]))) + subject = firstObject(subjects_copy[subject_id], "owl:annotatedSource") + predicate = firstObject(subjects_copy[subject_id], "owl:annotatedProperty") + obj = firstObject(subjects_copy[subject_id], "owl:annotatedTarget") + + del subjects_copy[subject_id]["owl:annotatedSource"] + del subjects_copy[subject_id]["owl:annotatedProperty"] + del subjects_copy[subject_id]["owl:annotatedTarget"] + del subjects_copy[subject_id]["rdf:type"] + + if not subjects_copy.get(subject): + subjects_copy[subject] = deepcopy(subjects[subject]) + if not subjects_copy[subject].get(predicate): + subjects_copy[subject][predicate] = deepcopy(subjects[subject][predicate]) + + objs = subjects_copy[subject][predicate] + objs_copy = [] + for o in objs: + o = deepcopy(o) + if o.get("object") == obj or o.get("value") == obj: + if 'annotations' not in o: + o['annotations'] = {} + for key, val in subjects_copy[subject_id].items(): + if key not in o['annotations']: + o['annotations'][key] = [] + o['annotations'][key] += val + remove.add(subject_id) + objs_copy.append(o) + subjects_copy[subject][predicate] = objs_copy + log("This is the result (subject: {}, predicate: {}):\n{}".format( + subject, predicate, pformat(subjects_copy[subject][predicate]))) + + if subjects_copy[subject_id].get("rdf:subject"): + subject = firstObject(subjects_copy[subject_id], "rdf:subject") + predicate = firstObject(subjects_copy[subject_id], "rdf:predicate") + obj = firstObject(subjects_copy[subject_id], "rdf:object") + + del subjects_copy[subject_id]["rdf:subject"] + del subjects_copy[subject_id]["rdf:predicate"] + del subjects_copy[subject_id]["rdf:object"] + del subjects_copy[subject_id]["rdf:type"] + + if not subjects_copy.get(subject): + subjects_copy[subject] = deepcopy(subjects[subject]) + if not subjects_copy[subject].get(predicate): + subjects_copy[subject][predicate] = deepcopy(subjects[subject][predicate]) + + objs = subjects_copy[subject][predicate] + objs_copy = [] + for o in objs: + o = deepcopy(o) + if o.get("object") == obj or o.get("value") == obj: + if 'metadata' not in o: + o['metadata'] = {} + for key, val in subjects_copy[subject_id].items(): + if key not in o['metadata']: + o['metadata'][key] = [] + o['metadata'][key] += val + remove.add(subject_id) + objs_copy.append(o) + subjects_copy[subject][predicate] = objs_copy + + for t in remove: + del subjects_copy[t] + + return subjects_copy + + +@nest_log +def subjects2thick(subjects): + """Convert a nested subjects map to thick rows. + From + {"ex:s": {"ex:p": [{"object": {"ex:a": [{"value": "A"}]}}]}} + to + {"subject": "ex:s", "predicate": "ex:p", "object": "{\"ex:a\":[{\"value\": \"A\"}]}"} + """ + rows = [] + for subject_id in sorted(list(subjects.keys())): + for predicate in sorted(list(subjects[subject_id].keys())): + for obj in subjects[subject_id][predicate]: + result = { + "subject": subject_id, + "predicate": predicate, + **obj + } + if result.get("object") and not isinstance(result["object"], str): + result["object"] = json.dumps(result["object"]) + rows.append(result) + return rows + +@nest_log +def thick2subjects(thick): + pass + + +### thick to Turtle +@nest_log +def shorten(content): + if isinstance(content, URIRef): + m = re.compile(r"(http:\S+(#|\/))(.*)").match(content) + if m: + for key in prefixes: + if m[1] == prefixes[key]: + return "{}:{}".format(key, m[3]) + if content.startswith("http"): + content = "<{}>".format(content) + return content + +@nest_log +def render_graph(graph, fh=sys.stdout): + ttls = sorted([(s, p, o) for s, p, o in graph]) + for subj, pred, obj in ttls: + print("{} {} ".format(shorten(subj), shorten(pred)), end="", file=fh) + if isinstance(obj, Literal) and obj.datatype: + print('"{}"^^{} '.format(obj.value, shorten(obj.datatype)), end="", file=fh) + elif isinstance(obj, Literal) and obj.language: + print('"{}"@{} '.format(obj.value, obj.language), end="", file=fh) + else: + print("{} ".format(shorten(obj)), end="", file=fh) + print(".", file=fh) + +@nest_log +def deprefix(content): + m = re.compile(r"([\w\-]+):(.*)").match(content) + if m and prefixes.get(m[1]): + return "{}{}".format(prefixes[m[1]], m[2]) + +@nest_log +def create_node(content): + if isinstance(content, URIRef): + return content + elif isinstance(content, str) and content.startswith('_:'): + return BNode(content) + elif isinstance(content, str) and (content.startswith('<')): + return URIRef(content.strip('<>')) + elif isinstance(content, str): + deprefixed_content = deprefix(content) + return URIRef(deprefixed_content) if deprefixed_content else Literal(content) + else: + if isinstance(content, dict) and 'value' in content and 'language' in content: + return Literal(content['value'], lang=content['language']) + elif isinstance(content, dict) and 'value' in content and 'datatype' in content: + deprefixed_datatype = deprefix(content['datatype']) + datatype = URIRef(content['datatype']) if not deprefixed_datatype \ + else URIRef(deprefixed_datatype) + return(Literal(content['value'], datatype=datatype)) + else: + log("WARNING: Could not create a node corresponding to content. Defaulting to Literal") + return Literal(format(content)) + +b_id = 0 +@nest_log +def thick2triples(_subject, _predicate, _thick_row): + if 'object' not in _thick_row and 'value' not in _thick_row: + raise Exception(f"Don't know how to handle thick_row without value or object: {_thick_row}") + + @nest_log + def predicateMap2triples(pred_map): + global b_id + b_id += 1 + + bnode = f"_:myb{b_id}" + triples = [] + for predicate, objects in pred_map.items(): + for obj in objects: + triples += thick2triples(bnode, predicate, obj) + return triples + + @nest_log + def decompress(thick_row, target, target_type, decomp_type): + spo_mappings = { + 'annotations': { + 'subject': 'owl:annotatedSource', + 'predicate': 'owl:annotatedProperty', + 'object': 'owl:annotatedTarget' + }, + 'metadata': { + 'subject': 'rdf:subject', + 'predicate': 'rdf:predicate', + 'object': 'rdf:object' + } + } + annodata_subj = spo_mappings[decomp_type]['subject'] + annodata_pred = spo_mappings[decomp_type]['predicate'] + annodata_obj = spo_mappings[decomp_type]['object'] + + if isinstance(target, str) or 'value' in target: + annodata = {annodata_obj: [{target_type: target}]} + else: + annodata = {annodata_obj: [{target_type: predicateMap2triples(target)}]} + + annodata[annodata_subj] = [{'object': thick_row['subject']}] + annodata[annodata_pred] = [{'object': thick_row['predicate']}] + object_type = 'owl:Axiom' if decomp_type == 'annotations' else 'rdf:Statement' + annodata['rdf:type'] = [{'object': object_type}] + for key in thick_row[decomp_type]: + annodata[key] = thick_row[decomp_type][key] + return annodata + + @nest_log + def obj2triples(thick_row): + global b_id + + target = thick_row['object'] + triples = [] + if isinstance(target, list): + for t in target: + triples += thick2triples(t['subject'], t['predicate'], t) + # This is extremely hacky but it should work because of the order in which the ids + # are generated here. See also the similar comment below. In that case ids are generated + # in ascending order. + next_id = b_id - 1 + triples.append({'subject': create_node(_subject), + 'predicate': create_node(_predicate), + 'object': create_node(f"_:myb{next_id}")}) + elif not isinstance(target, str): + # This is a hacky way of doing this, but the logic is right. We need to save + # the b_id here because predicateMap2Triples is a recursive function and it will + # increment the b_id every time it is called. What we need here is just whatever the + # next id will be. + next_id = b_id + 1 + triples += predicateMap2triples(target) + triples.append({'subject': create_node(_subject), + 'predicate': create_node(_predicate), + 'object': create_node(f"_:myb{next_id}")}) + else: + triples.append({'subject': create_node(_subject), + 'predicate': create_node(_predicate), + 'object': create_node(target)}) + + if 'annotations' in thick_row: + triples += predicateMap2triples(decompress(thick_row, target, 'object', 'annotations')) + + if 'metadata' in thick_row: + triples += predicateMap2triples(decompress(thick_row, target, 'object', 'metadata')) + + return triples + + @nest_log + def val2triples(thick_row): + target = value = thick_row['value'] + if 'datatype' in thick_row: + target = {'value': value, 'datatype': thick_row['datatype']} + elif 'language' in thick_row: + target = {'value': value, 'language': thick_row['language']} + + triples = [{'subject': create_node(_subject), + 'predicate': create_node(_predicate), + 'object': create_node(target)}] + + if 'annotations' in thick_row: + triples += predicateMap2triples(decompress(thick_row, target, 'value', 'annotations')) + + if 'metadata' in thick_row: + triples += predicateMap2triples(decompress(thick_row, target, 'value', 'metadata')) + + return triples + + if "object" in _thick_row: + return obj2triples(_thick_row) + elif 'value' in _thick_row: + return val2triples(_thick_row) + +@nest_log +def thicks2triples(thick_rows): + triples = [] + for row in thick_rows: + if "object" in row: + o = row["object"] + if isinstance(o, str) and o.startswith("{"): + row["object"] = json.loads(o) + triples += thick2triples(row['subject'], row['predicate'], row) + return triples + +owlTypes = ["owl:Restriction"] + +@nest_log +def firstObject(predicates, predicate): + """Given a prediate map, return the first 'object'.""" + if predicates.get(predicate): + for obj in predicates[predicate]: + if obj.get("object"): + return obj["object"] + elif obj.get('value'): + return obj["value"] + + log("No object found") + +@nest_log +def rdf2list(predicates): + """Convert a nested RDF list to a simple list of objects. + From + {'rdf:type': [{'object': 'rdf:List'}], + 'rdf:first': [{'value': 'A'}], + 'rdf:rest': [{ + 'object': { + 'rdf:type': [{'object': 'rdf:List'}], + 'rdf:first': [{'value': 'B'}], + 'rdf:rest': [{'object': 'rdf:nil'}]}}]}} + to + [{"value": "A"}, {"value": "B"}] + """ + result = [] + if "rdf:first" in predicates: + result.append(predicates["rdf:first"][0]) + if "rdf:rest" in predicates: + o = predicates["rdf:rest"][0] + if not o: + return result + if not o.get("object"): + return result + if o["object"] == "rdf:nil": + return result + return result + rdf2list(o["object"]) + return result + +@nest_log +def rdf2ofs(predicates): + """Given a predicate map, try to return an OWL Functional S-Expression. + From + {'rdf:type': [{'object': 'owl:Restriction'}], + 'owl:onProperty': [{'object': 'ex:part-of'}], + 'owl:someValuesFrom': [{'object': 'ex:bar'}]} + to + ["ObjectSomeValuesFrom", "ex:part-of", "ex:bar"] + """ + rdfType = firstObject(predicates, "rdf:type") + result = None + if rdfType == "owl:Restriction": + onProperty = firstObject(predicates, "owl:onProperty") + someValuesFrom = firstObject(predicates, "owl:someValuesFrom") + result = ["ObjectSomeValuesFrom", onProperty, someValuesFrom] + elif rdfType == "rdf:List": + result = ["RDFList"] + rdf2list(predicates) + # TODO: handle all the OFN types (See: https://www.w3.org/TR/2012/REC-owl2-mapping-to-rdf-20121211/) + else: + raise Exception(f"Unhandled type '{rdfType}' for: {predicates}") + return result + +@nest_log +def thick2reasoned(thick): + """Convert logical thick rows to reasoned rows. + From + [{"subject": "ex:a", "predicate": "owl:equivalentClass", "object": "ex:b"}] + to + [{"super": "ex:a", "sub": "ex:b"} + {"super": "ex:b", "sub": "ex:a"}] + """ + reasoned = [] + for row in thick: + owl = None + if row["predicate"] in ["rdfs:subClassOf", "owl:equivalentClass"]: + if row.get("object") and isinstance(row["object"], str): + if row["object"].startswith("{"): + o = rdf2ofs(json.loads(row["object"])) + else: + o = row["object"] + result = { + "super": o, + "sub": row["subject"], + } + reasoned.append(result) + if row["predicate"] in ["owl:equivalentClass"]: + result = { + "super": row["subject"], + "sub": o, + } + reasoned.append(result) + return reasoned + +@nest_log +def quote(label): + if re.search(r'\W', label): + return f"'{label}'" + return label + +@nest_log +def ofs2omn(labels, ofs): + """Convert OFS to Manchester (OMN) with labels. + From + ["ObjectSomeValuesFrom", "ex:part-of", "ex:bar"] + to + 'part of' some Bar + """ + first = ofs[0] + if first == "ObjectSomeValuesFrom": + onProperty = quote(labels.get(ofs[1], ofs[1])) + someValuesFrom = quote(labels.get(ofs[2], ofs[2])) + return f"{onProperty} some {someValuesFrom}" + # TODO: handle all the OFN types + else: + raise Exception(f"Unhandled expression type '{first}' for: {ofs}") + +@nest_log +def po2rdfa(labels, predicate, obj): + if isinstance(obj, str): + obj = {"object": obj} + if obj.get("object"): + o = obj["object"] + if isinstance(o, str): + if o.startswith("<"): + o = o[1:-1] + return [ + "a", + { + "href": o, + "property": predicate, + }, + labels.get(o, o), + ] + try: + return ofs2rdfa(labels, rdf2ofs(o)) + except: + return ["span", str(o)] + elif obj.get("value"): + return [ + "span", + {"property": predicate}, + obj["value"], + ] + else: + raise Exception(f"Unhandled object: {obj}") + +@nest_log +def ofs2rdfa(labels, ofs): + """Convert an OFS list to an HTML vector.""" + first = ofs[0] + if first == "ObjectSomeValuesFrom": + onProperty = po2rdfa(labels, "owl:onProperty", ofs[1]) + someValuesFrom = po2rdfa(labels, "owl:someValuesFrom", ofs[2]) + return ["span", onProperty, " some ", someValuesFrom] + elif first == "RDFList": + return ["span", "TODO " + str(ofs)] + # TODO: handle all the OFN types + else: + raise Exception(f"Unhandled expression type '{first}' for: {ofs}") + +@nest_log +def rows2labels(rows): + """Given a list of rows, return a map from subject to rdfs:label value.""" + labels = {} + for row in rows: + if row["predicate"] == "rdfs:label": + labels[row["subject"]] = row["value"] + return labels + +@nest_log +def subject2rdfa(labels, subject_id, predicates): + """Convert a subject_id and predicate map to an HTML vector.""" + html = ["ul"] + for predicate in sorted(list(predicates.keys())): + for obj in predicates[predicate]: + html.append(["li", po2rdfa(labels, predicate, obj)]) + return ["li", subject_id, html] + +@nest_log +def subjects2rdfa(labels, subjects): + """Convert a subject_id and subjects map to an HTML vector.""" + html = ["ul"] + for subject_id in sorted(list(subjects.keys())): + html.append(subject2rdfa(labels, subject_id, subjects[subject_id])) + return html + + +if __name__ == "__main__": + p = ArgumentParser("prototype.py", description="First pass at thick triples prototype") + p.add_argument("-f", "--filter", nargs="+", default=[], + help="filter only on the given comma-separated list of stanzas") + p.add_argument("-l", "--log", action='store_true') + p.add_argument('THIN', help='The file, in TSV format, that contains thin triples to convert.') + p.add_argument('ONTOLOGY', help='The ontology file to use for the round-trip test.') + args = p.parse_args() + debug = bool(args.log) + thin_input = args.THIN + expected_file = args.ONTOLOGY + + rdfList = {'rdf:type': [{'object': 'rdf:List'}], + 'rdf:first': [{'value': 'A'}], + 'rdf:rest': [{'object': {'rdf:type': [{'object': 'rdf:List'}], + 'rdf:first': [{'value': 'B'}], + 'rdf:rest': [{'object': 'rdf:nil'}]}}]} + log("List {}".format(rdf2ofs(rdfList))) + + print("Reading in thin rows ...", file=sys.stderr) + with open(thin_input) as fh: + thin = list(csv.DictReader(fh, delimiter="\t")) + if args.filter: + pruned_thin = [row for row in thin if row['stanza'] in args.filter] + else: + pruned_thin = [] + + if args.filter and not pruned_thin: + print("WARNING No stanzas corresponding to {} in db".format(', '.join(args.filter))) + thin = thin if not pruned_thin else pruned_thin + + ############################ + ####### Generate thick rows + ############################ + print("Generating thick rows ...", file=sys.stderr) + with open("build/prefixes.n3", "w") as fh: + for prefix in prefixes: + print("@prefix {}: {} .".format(prefix, prefixes[prefix].strip('<>')), file=fh) + + thin_by_stanza = {} + for t in thin: + if t['stanza'] not in thin_by_stanza: + thin_by_stanza[t['stanza']] = [] + thin_by_stanza[t['stanza']].append(t) + + thick = [] + for (stanza, thin) in thin_by_stanza.items(): + subjects = thin2subjects(thin) + thick += subjects2thick(subjects) + + #print(pformat(thick)) + #sys.exit(0) + + ############################ + # Round-trip: go from thick rows to thin triples, build a graph, and then compare to the + # original. + ############################ + print("Generating graph ...", file=sys.stderr) + triples = thicks2triples(thick) + with open("build/triples.json", "w") as fh: + print(pformat(triples), file=fh) + + actual = Graph() + [actual.add((triple['subject'], triple['predicate'], triple['object'])) for triple in triples] + with open("build/triples.n3", "w") as fh: + render_graph(actual, fh) + + expected = Graph() + if expected_file.endswith(".ttl"): + expected.parse(expected_file, format="ttl") + else: + expected.parse(expected_file) + + with open("build/expected.ttl", "w") as fh: + print(expected.serialize(format="n3").decode("utf-8"), file=fh) + with open("build/actual.ttl", "w") as fh: + print(actual.serialize(format="n3").decode("utf-8"), file=fh) + + print("Comparing graph to expected graph ...", file=sys.stderr) + try: + compare_graphs(actual, expected, True) + except AssertionError as e: + print("Graphs are not identical. Full dumps can be found in build/actual.ttl " + "and build/expected.ttl") + else: + print("Graphs are identical") + + # Wait on this one for now ... + #reasoned = thick2reasoned(thick) + #ofs = reasoned[0]["super"] + #labels = { + # "ex:part-of": "part of", + # "ex:bar": "Bar", + #} + #print("OFS {}".format(ofs)) + #print("OMN {}".format(ofs2omn(labels, ofs))) + #rdfa = ofs2rdfa(labels, ofs) + #print("RDFa {}".format(rdfa)) + #print("HTML {}".format(render(prefixes, rdfa))) + #rdfa = subject2rdfa(labels, "ex:foo", subjects["ex:foo"]) + ##print("RDFa {}".format(rdfa)) + #print("HTML\n" + render(prefixes, rdfa)) diff --git a/tests/resources/example.rdf b/tests/resources/example.rdf new file mode 100644 index 0000000..aed65f0 --- /dev/null +++ b/tests/resources/example.rdf @@ -0,0 +1,66 @@ + + + + + + A + + + B + + + + + + + 123 + Foo + Fou + + + + + + + + + OWL axiom annotation + + + + + + + + + RDF metadata + + diff --git a/tests/resources/prefix.tsv b/tests/resources/prefix.tsv index b6a7aea..0afc7dd 100644 --- a/tests/resources/prefix.tsv +++ b/tests/resources/prefix.tsv @@ -8,7 +8,7 @@ dce http://purl.org/dc/elements/1.1/ dct http://purl.org/dc/terms/ foaf http://xmlns.com/foaf/0.1/ protege http://protege.stanford.edu/plugins/owl/protege# -ex http://example.com +ex http://example.com/ BFO http://purl.obolibrary.org/obo/BFO_ CHEBI http://purl.obolibrary.org/obo/CHEBI_ CL http://purl.obolibrary.org/obo/CL_ diff --git a/tests/thick.tsv b/tests/thick.tsv new file mode 100644 index 0000000..96bafa1 --- /dev/null +++ b/tests/thick.tsv @@ -0,0 +1,8 @@ +subject predicate object value datatype language annotations metadata +ex:foo rdfs:label Foo +ex:foo rdfs:label Fou fr +ex:foo ex:size 123 xsd:int +ex:foo ex:link "{""rdfs:comment"":[{""value"":""OWL axiom annotation"",""language"":""en""}]}" "{""rdfs:comment"":[{""value"":""RDF metadata"",""language"":""en""}]}" +ex:foo rdf:type owl:Class +ex:foo rdfs:subClassOf "{""rdf:type"":[{""object"":""owl:Restriction""}],""owl:onProperty"":[{""object"":""ex:part-of""}],""owl:someValuesFrom"":[{""object"":""ex:bar""}]}" +ex:foo ex:items "[{""value"":""A""},{""value"":""B""}]" diff --git a/tests/thin.tsv b/tests/thin.tsv new file mode 100644 index 0000000..1396683 --- /dev/null +++ b/tests/thin.tsv @@ -0,0 +1,27 @@ +stanza subject predicate object value datatype language +ex:foo ex:foo rdfs:label Foo +ex:foo ex:foo rdfs:label Fou fr +ex:foo ex:foo ex:size 123 xsd:int +ex:foo ex:foo ex:link +ex:foo ex:foo rdf:type owl:Class +ex:foo ex:foo rdfs:subClassOf _:b1 +ex:foo _:b1 rdf:type owl:Restriction +ex:foo _:b1 owl:onProperty ex:part-of +ex:foo _:b1 owl:someValuesFrom ex:bar +ex:foo ex:foo ex:items _:b2 +ex:foo _:b2 rdf:type rdf:List +ex:foo _:b2 rdf:first A +ex:foo _:b2 rdf:rest _:b3 +ex:foo _:b3 rdf:type rdf:List +ex:foo _:b3 rdf:first B +ex:foo _:b3 rdf:rest rdf:nil +ex:foo _:b4 rdf:type owl:Axiom +ex:foo _:b4 owl:annotatedSource ex:foo +ex:foo _:b4 owl:annotatedProperty ex:link +ex:foo _:b4 owl:annotatedTarget +ex:foo _:b4 rdfs:comment OWL axiom annotation en +ex:foo _:b5 rdf:type rdf:Statement +ex:foo _:b5 rdf:subject ex:foo +ex:foo _:b5 rdf:predicate ex:link +ex:foo _:b5 rdf:object +ex:foo _:b5 rdfs:comment RDF metadata en diff --git a/tests/util.py b/tests/util.py index 2433a7c..705b393 100644 --- a/tests/util.py +++ b/tests/util.py @@ -18,23 +18,29 @@ test_conn = {"host": POSTGRES_HOST, "database": "gizmos_test", "user": POSTGRES_USER, "password": POSTGRES_PW, "port": POSTGRES_PORT} -def dump_ttl_sorted(graph): - for line in sorted(graph.serialize(format="ttl").splitlines()): +def dump_ttl(graph, sort): + lines = graph.serialize(format="ttl").splitlines() + if sort: + lines.sort() + for line in lines: if line: - print(line.decode("ascii")) + try: + print(line.decode("ascii")) + except UnicodeDecodeError: + print(line) - -def compare_graphs(actual, expected): +def compare_graphs(actual, expected, show_diff=False, sort=False): actual_iso = to_isomorphic(actual) expected_iso = to_isomorphic(expected) if actual_iso != expected_iso: - _, in_first, in_second = graph_diff(actual_iso, expected_iso) print("The actual and expected graphs differ") - print("----- Contents of actual graph not in expected graph -----") - dump_ttl_sorted(in_first) - print("----- Contents of expected graph not in actual graph -----") - dump_ttl_sorted(in_second) + if show_diff: + _, in_first, in_second = graph_diff(actual_iso, expected_iso) + print("----- Contents of actual graph not in expected graph -----") + dump_ttl(in_first, sort) + print("----- Contents of expected graph not in actual graph -----") + dump_ttl(in_second, sort) assert actual_iso == expected_iso