diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..e25672d
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,14 @@
+MAKEFLAGS += --warn-undefined-variables
+SHELL := bash
+.SHELLFLAGS := -eu -o pipefail -c
+.DEFAULT_GOAL := round-trip
+.DELETE_ON_ERROR:
+.SUFFIXES:
+
+#args = --log --filter 'obo:CLO_0000001'
+#args = --log --filter 'BFO:0000027'
+#args = --log --filter 'OBI:0100061'
+
+# round-trip: build/obi_core.tsv obi_core_no_trailing_ws.owl
+round-trip: tests/thin.tsv tests/resources/example.rdf
+ tests/prototype.py $(args) $^
diff --git a/tests/prefix.tsv b/tests/prefix.tsv
new file mode 100644
index 0000000..cadcabd
--- /dev/null
+++ b/tests/prefix.tsv
@@ -0,0 +1,6 @@
+prefix base
+rdf http://www.w3.org/1999/02/22-rdf-syntax-ns#
+rdfs http://www.w3.org/2000/01/rdf-schema#
+xsd http://www.w3.org/2001/XMLSchema#
+owl http://www.w3.org/2002/07/owl#
+ex http://example.com/
diff --git a/tests/prototype.py b/tests/prototype.py
new file mode 100755
index 0000000..7656267
--- /dev/null
+++ b/tests/prototype.py
@@ -0,0 +1,747 @@
+#!/usr/bin/env python3
+
+import csv
+import json
+import re
+import sqlite3
+import sys
+
+from argparse import ArgumentParser
+from collections import OrderedDict
+from copy import deepcopy
+import functools
+from gizmos.hiccup import render
+from pprint import pformat
+from rdflib import Graph, BNode, URIRef, Literal
+
+from util import compare_graphs
+
+thin_input = None
+expected_owl = None
+
+# Create an OrderedDict of prefixes, sorted in descending order by the length
+# of the prefix's long form:
+prefixes = []
+with open("tests/resources/prefix.tsv") as fh:
+ rows = csv.DictReader(fh, delimiter="\t")
+ for row in rows:
+ if row.get("prefix"):
+ prefixes.append((row["prefix"], row["base"]))
+prefixes.sort(key=lambda x: len(x[1]), reverse=True)
+prefixes = OrderedDict(prefixes)
+
+debug = False
+nesting = 0
+def log(message):
+ if debug:
+ global nesting
+ message = message.replace('\n', '\n' + ' '*nesting)
+ print(' '*nesting, end='', file=sys.stderr)
+ print(message, file=sys.stderr)
+
+def nest_log(fn):
+ """Decorator to increase the indentation of the log; useful for recursive debugging."""
+ @functools.wraps(fn)
+ def wrapped(*args, **kwargs):
+ global nesting
+ log("Entering function {} ...".format(fn.__name__))
+ nesting += 1
+ ret = fn(*args, **kwargs)
+ nesting -= 1
+ log("Exited function {} ...".format(fn.__name__))
+ return ret
+ return wrapped
+
+
+# def dict_factory(cursor, row):
+# d = {}
+# for idx, col in enumerate(cursor.description):
+# d[col[0]] = row[idx]
+# return d
+# con = sqlite3.connect('obi_core.db')
+# con.row_factory = dict_factory
+# cur = con.cursor()
+# thin = []
+# for row in cur.execute('SELECT * FROM statements'):
+# thin.append(row)
+
+@nest_log
+def renderSubjects(subjects):
+ """Print a nested subject dict as indented lines.
+ From
+ {"ex:s": {"ex:p": [{"object": "ex:o"}]}}
+ to
+ ex:s
+ ex:p
+ {"object": "ex:o"}
+ """
+ for subject_id in sorted(list(subjects.keys())):
+ print(subject_id)
+ for predicate in sorted(list(subjects[subject_id].keys())):
+ print(" ", predicate)
+ for obj in subjects[subject_id][predicate]:
+ print(" ", obj)
+
+@nest_log
+def row2objectMap(row):
+ """Convert a row dict to an object map.
+ From
+ {"subject": "ex:s", "predicate": "ex:p", "object": "ex:o"}
+ to
+ {"object": "ex:o"}
+ {"value": "Foo"}
+ {"value": "Foo", "language": "en"}
+ {"value": "0.123", "datatype": "xsd:float"}
+ """
+ if row.get("object"):
+ return {"object": row["object"]}
+ elif row.get("value") is not None:
+ if row.get("datatype"):
+ return {"value": row["value"], "datatype": row["datatype"]}
+ elif row.get("language"):
+ return {"value": row["value"], "language": row["language"]}
+ elif row["value"]:
+ return {"value": row["value"]}
+
+ log("Invalid RDF row {}".format(row))
+ raise Exception("Invalid RDF row")
+
+@nest_log
+def thin2subjects(thin):
+ """Convert a list of thin rows to a nested subjects map:
+ From
+ [{"subject": "ex:s", "predicate": "ex:p", "object": "ex:o"}]
+ to
+ {"ex:s": {"ex:p": [{"object": "ex:o"}]}}
+ """
+ dependencies = {}
+ subject_ids = set(x["subject"] for x in thin)
+ subjects = {}
+
+ # Convert rows to a subject dict.
+ for subject_id in subject_ids:
+ predicates = {}
+ for row in thin:
+ if row["subject"] != subject_id:
+ continue
+ predicate = row["predicate"]
+ if predicate not in predicates:
+ predicates[predicate] = []
+ objects = predicates[predicate]
+ objects.append(row2objectMap(row))
+ objects.sort(key=lambda k: str(k))
+ predicates[predicate] = objects
+ if row.get("object") and row["object"].startswith("_:"):
+ if not subject_id in dependencies:
+ dependencies[subject_id] = set()
+ dependencies[subject_id].add(row["object"])
+ subjects[subject_id] = predicates
+
+ # Work from leaves to root, nesting the blank structures.
+ last_leaves = 0
+ while dependencies:
+ leaves = set(subjects.keys()) - set(dependencies.keys())
+ if len(leaves) == last_leaves:
+ # This is not necessarily a problem, so we comment out the `break` statement here, but
+ # we emit a warning anyway.
+ log("LOOP!?")
+ # break
+ last_leaves = len(leaves)
+ dependencies = {}
+ handled = set()
+ for subject_id, predicates in subjects.items():
+ for predicate in predicates.keys():
+ objects = []
+ for obj in predicates[predicate]:
+ if not obj:
+ log("Bad object: <{} {} {}>".format(subject_id, predicate, obj))
+ continue
+ o = obj.get("object")
+ if o and isinstance(o, str) and o.startswith("_:"):
+ if o in leaves:
+ obj = {"object": subjects[o]}
+ handled.add(o)
+ else:
+ if not subject_id in dependencies:
+ dependencies[subject_id] = set()
+ dependencies[subject_id].add(o)
+ objects.append(obj)
+ objects.sort(key=lambda k: str(k))
+ predicates[predicate] = objects
+ for subject_id in handled:
+ del subjects[subject_id]
+
+ remove = set()
+ subjects_copy = {}
+ for subject_id in sorted(subjects.keys()):
+ if not subjects_copy.get(subject_id):
+ subjects_copy[subject_id] = deepcopy(subjects[subject_id])
+
+ if subjects_copy[subject_id].get("owl:annotatedSource"):
+ log("Annotating subject: {}".format(subject_id))
+ log("This is what we've got to work with:\n{}".format(pformat(subjects_copy[subject_id])))
+ subject = firstObject(subjects_copy[subject_id], "owl:annotatedSource")
+ predicate = firstObject(subjects_copy[subject_id], "owl:annotatedProperty")
+ obj = firstObject(subjects_copy[subject_id], "owl:annotatedTarget")
+
+ del subjects_copy[subject_id]["owl:annotatedSource"]
+ del subjects_copy[subject_id]["owl:annotatedProperty"]
+ del subjects_copy[subject_id]["owl:annotatedTarget"]
+ del subjects_copy[subject_id]["rdf:type"]
+
+ if not subjects_copy.get(subject):
+ subjects_copy[subject] = deepcopy(subjects[subject])
+ if not subjects_copy[subject].get(predicate):
+ subjects_copy[subject][predicate] = deepcopy(subjects[subject][predicate])
+
+ objs = subjects_copy[subject][predicate]
+ objs_copy = []
+ for o in objs:
+ o = deepcopy(o)
+ if o.get("object") == obj or o.get("value") == obj:
+ if 'annotations' not in o:
+ o['annotations'] = {}
+ for key, val in subjects_copy[subject_id].items():
+ if key not in o['annotations']:
+ o['annotations'][key] = []
+ o['annotations'][key] += val
+ remove.add(subject_id)
+ objs_copy.append(o)
+ subjects_copy[subject][predicate] = objs_copy
+ log("This is the result (subject: {}, predicate: {}):\n{}".format(
+ subject, predicate, pformat(subjects_copy[subject][predicate])))
+
+ if subjects_copy[subject_id].get("rdf:subject"):
+ subject = firstObject(subjects_copy[subject_id], "rdf:subject")
+ predicate = firstObject(subjects_copy[subject_id], "rdf:predicate")
+ obj = firstObject(subjects_copy[subject_id], "rdf:object")
+
+ del subjects_copy[subject_id]["rdf:subject"]
+ del subjects_copy[subject_id]["rdf:predicate"]
+ del subjects_copy[subject_id]["rdf:object"]
+ del subjects_copy[subject_id]["rdf:type"]
+
+ if not subjects_copy.get(subject):
+ subjects_copy[subject] = deepcopy(subjects[subject])
+ if not subjects_copy[subject].get(predicate):
+ subjects_copy[subject][predicate] = deepcopy(subjects[subject][predicate])
+
+ objs = subjects_copy[subject][predicate]
+ objs_copy = []
+ for o in objs:
+ o = deepcopy(o)
+ if o.get("object") == obj or o.get("value") == obj:
+ if 'metadata' not in o:
+ o['metadata'] = {}
+ for key, val in subjects_copy[subject_id].items():
+ if key not in o['metadata']:
+ o['metadata'][key] = []
+ o['metadata'][key] += val
+ remove.add(subject_id)
+ objs_copy.append(o)
+ subjects_copy[subject][predicate] = objs_copy
+
+ for t in remove:
+ del subjects_copy[t]
+
+ return subjects_copy
+
+
+@nest_log
+def subjects2thick(subjects):
+ """Convert a nested subjects map to thick rows.
+ From
+ {"ex:s": {"ex:p": [{"object": {"ex:a": [{"value": "A"}]}}]}}
+ to
+ {"subject": "ex:s", "predicate": "ex:p", "object": "{\"ex:a\":[{\"value\": \"A\"}]}"}
+ """
+ rows = []
+ for subject_id in sorted(list(subjects.keys())):
+ for predicate in sorted(list(subjects[subject_id].keys())):
+ for obj in subjects[subject_id][predicate]:
+ result = {
+ "subject": subject_id,
+ "predicate": predicate,
+ **obj
+ }
+ if result.get("object") and not isinstance(result["object"], str):
+ result["object"] = json.dumps(result["object"])
+ rows.append(result)
+ return rows
+
+@nest_log
+def thick2subjects(thick):
+ pass
+
+
+### thick to Turtle
+@nest_log
+def shorten(content):
+ if isinstance(content, URIRef):
+ m = re.compile(r"(http:\S+(#|\/))(.*)").match(content)
+ if m:
+ for key in prefixes:
+ if m[1] == prefixes[key]:
+ return "{}:{}".format(key, m[3])
+ if content.startswith("http"):
+ content = "<{}>".format(content)
+ return content
+
+@nest_log
+def render_graph(graph, fh=sys.stdout):
+ ttls = sorted([(s, p, o) for s, p, o in graph])
+ for subj, pred, obj in ttls:
+ print("{} {} ".format(shorten(subj), shorten(pred)), end="", file=fh)
+ if isinstance(obj, Literal) and obj.datatype:
+ print('"{}"^^{} '.format(obj.value, shorten(obj.datatype)), end="", file=fh)
+ elif isinstance(obj, Literal) and obj.language:
+ print('"{}"@{} '.format(obj.value, obj.language), end="", file=fh)
+ else:
+ print("{} ".format(shorten(obj)), end="", file=fh)
+ print(".", file=fh)
+
+@nest_log
+def deprefix(content):
+ m = re.compile(r"([\w\-]+):(.*)").match(content)
+ if m and prefixes.get(m[1]):
+ return "{}{}".format(prefixes[m[1]], m[2])
+
+@nest_log
+def create_node(content):
+ if isinstance(content, URIRef):
+ return content
+ elif isinstance(content, str) and content.startswith('_:'):
+ return BNode(content)
+ elif isinstance(content, str) and (content.startswith('<')):
+ return URIRef(content.strip('<>'))
+ elif isinstance(content, str):
+ deprefixed_content = deprefix(content)
+ return URIRef(deprefixed_content) if deprefixed_content else Literal(content)
+ else:
+ if isinstance(content, dict) and 'value' in content and 'language' in content:
+ return Literal(content['value'], lang=content['language'])
+ elif isinstance(content, dict) and 'value' in content and 'datatype' in content:
+ deprefixed_datatype = deprefix(content['datatype'])
+ datatype = URIRef(content['datatype']) if not deprefixed_datatype \
+ else URIRef(deprefixed_datatype)
+ return(Literal(content['value'], datatype=datatype))
+ else:
+ log("WARNING: Could not create a node corresponding to content. Defaulting to Literal")
+ return Literal(format(content))
+
+b_id = 0
+@nest_log
+def thick2triples(_subject, _predicate, _thick_row):
+ if 'object' not in _thick_row and 'value' not in _thick_row:
+ raise Exception(f"Don't know how to handle thick_row without value or object: {_thick_row}")
+
+ @nest_log
+ def predicateMap2triples(pred_map):
+ global b_id
+ b_id += 1
+
+ bnode = f"_:myb{b_id}"
+ triples = []
+ for predicate, objects in pred_map.items():
+ for obj in objects:
+ triples += thick2triples(bnode, predicate, obj)
+ return triples
+
+ @nest_log
+ def decompress(thick_row, target, target_type, decomp_type):
+ spo_mappings = {
+ 'annotations': {
+ 'subject': 'owl:annotatedSource',
+ 'predicate': 'owl:annotatedProperty',
+ 'object': 'owl:annotatedTarget'
+ },
+ 'metadata': {
+ 'subject': 'rdf:subject',
+ 'predicate': 'rdf:predicate',
+ 'object': 'rdf:object'
+ }
+ }
+ annodata_subj = spo_mappings[decomp_type]['subject']
+ annodata_pred = spo_mappings[decomp_type]['predicate']
+ annodata_obj = spo_mappings[decomp_type]['object']
+
+ if isinstance(target, str) or 'value' in target:
+ annodata = {annodata_obj: [{target_type: target}]}
+ else:
+ annodata = {annodata_obj: [{target_type: predicateMap2triples(target)}]}
+
+ annodata[annodata_subj] = [{'object': thick_row['subject']}]
+ annodata[annodata_pred] = [{'object': thick_row['predicate']}]
+ object_type = 'owl:Axiom' if decomp_type == 'annotations' else 'rdf:Statement'
+ annodata['rdf:type'] = [{'object': object_type}]
+ for key in thick_row[decomp_type]:
+ annodata[key] = thick_row[decomp_type][key]
+ return annodata
+
+ @nest_log
+ def obj2triples(thick_row):
+ global b_id
+
+ target = thick_row['object']
+ triples = []
+ if isinstance(target, list):
+ for t in target:
+ triples += thick2triples(t['subject'], t['predicate'], t)
+ # This is extremely hacky but it should work because of the order in which the ids
+ # are generated here. See also the similar comment below. In that case ids are generated
+ # in ascending order.
+ next_id = b_id - 1
+ triples.append({'subject': create_node(_subject),
+ 'predicate': create_node(_predicate),
+ 'object': create_node(f"_:myb{next_id}")})
+ elif not isinstance(target, str):
+ # This is a hacky way of doing this, but the logic is right. We need to save
+ # the b_id here because predicateMap2Triples is a recursive function and it will
+ # increment the b_id every time it is called. What we need here is just whatever the
+ # next id will be.
+ next_id = b_id + 1
+ triples += predicateMap2triples(target)
+ triples.append({'subject': create_node(_subject),
+ 'predicate': create_node(_predicate),
+ 'object': create_node(f"_:myb{next_id}")})
+ else:
+ triples.append({'subject': create_node(_subject),
+ 'predicate': create_node(_predicate),
+ 'object': create_node(target)})
+
+ if 'annotations' in thick_row:
+ triples += predicateMap2triples(decompress(thick_row, target, 'object', 'annotations'))
+
+ if 'metadata' in thick_row:
+ triples += predicateMap2triples(decompress(thick_row, target, 'object', 'metadata'))
+
+ return triples
+
+ @nest_log
+ def val2triples(thick_row):
+ target = value = thick_row['value']
+ if 'datatype' in thick_row:
+ target = {'value': value, 'datatype': thick_row['datatype']}
+ elif 'language' in thick_row:
+ target = {'value': value, 'language': thick_row['language']}
+
+ triples = [{'subject': create_node(_subject),
+ 'predicate': create_node(_predicate),
+ 'object': create_node(target)}]
+
+ if 'annotations' in thick_row:
+ triples += predicateMap2triples(decompress(thick_row, target, 'value', 'annotations'))
+
+ if 'metadata' in thick_row:
+ triples += predicateMap2triples(decompress(thick_row, target, 'value', 'metadata'))
+
+ return triples
+
+ if "object" in _thick_row:
+ return obj2triples(_thick_row)
+ elif 'value' in _thick_row:
+ return val2triples(_thick_row)
+
+@nest_log
+def thicks2triples(thick_rows):
+ triples = []
+ for row in thick_rows:
+ if "object" in row:
+ o = row["object"]
+ if isinstance(o, str) and o.startswith("{"):
+ row["object"] = json.loads(o)
+ triples += thick2triples(row['subject'], row['predicate'], row)
+ return triples
+
+owlTypes = ["owl:Restriction"]
+
+@nest_log
+def firstObject(predicates, predicate):
+ """Given a prediate map, return the first 'object'."""
+ if predicates.get(predicate):
+ for obj in predicates[predicate]:
+ if obj.get("object"):
+ return obj["object"]
+ elif obj.get('value'):
+ return obj["value"]
+
+ log("No object found")
+
+@nest_log
+def rdf2list(predicates):
+ """Convert a nested RDF list to a simple list of objects.
+ From
+ {'rdf:type': [{'object': 'rdf:List'}],
+ 'rdf:first': [{'value': 'A'}],
+ 'rdf:rest': [{
+ 'object': {
+ 'rdf:type': [{'object': 'rdf:List'}],
+ 'rdf:first': [{'value': 'B'}],
+ 'rdf:rest': [{'object': 'rdf:nil'}]}}]}}
+ to
+ [{"value": "A"}, {"value": "B"}]
+ """
+ result = []
+ if "rdf:first" in predicates:
+ result.append(predicates["rdf:first"][0])
+ if "rdf:rest" in predicates:
+ o = predicates["rdf:rest"][0]
+ if not o:
+ return result
+ if not o.get("object"):
+ return result
+ if o["object"] == "rdf:nil":
+ return result
+ return result + rdf2list(o["object"])
+ return result
+
+@nest_log
+def rdf2ofs(predicates):
+ """Given a predicate map, try to return an OWL Functional S-Expression.
+ From
+ {'rdf:type': [{'object': 'owl:Restriction'}],
+ 'owl:onProperty': [{'object': 'ex:part-of'}],
+ 'owl:someValuesFrom': [{'object': 'ex:bar'}]}
+ to
+ ["ObjectSomeValuesFrom", "ex:part-of", "ex:bar"]
+ """
+ rdfType = firstObject(predicates, "rdf:type")
+ result = None
+ if rdfType == "owl:Restriction":
+ onProperty = firstObject(predicates, "owl:onProperty")
+ someValuesFrom = firstObject(predicates, "owl:someValuesFrom")
+ result = ["ObjectSomeValuesFrom", onProperty, someValuesFrom]
+ elif rdfType == "rdf:List":
+ result = ["RDFList"] + rdf2list(predicates)
+ # TODO: handle all the OFN types (See: https://www.w3.org/TR/2012/REC-owl2-mapping-to-rdf-20121211/)
+ else:
+ raise Exception(f"Unhandled type '{rdfType}' for: {predicates}")
+ return result
+
+@nest_log
+def thick2reasoned(thick):
+ """Convert logical thick rows to reasoned rows.
+ From
+ [{"subject": "ex:a", "predicate": "owl:equivalentClass", "object": "ex:b"}]
+ to
+ [{"super": "ex:a", "sub": "ex:b"}
+ {"super": "ex:b", "sub": "ex:a"}]
+ """
+ reasoned = []
+ for row in thick:
+ owl = None
+ if row["predicate"] in ["rdfs:subClassOf", "owl:equivalentClass"]:
+ if row.get("object") and isinstance(row["object"], str):
+ if row["object"].startswith("{"):
+ o = rdf2ofs(json.loads(row["object"]))
+ else:
+ o = row["object"]
+ result = {
+ "super": o,
+ "sub": row["subject"],
+ }
+ reasoned.append(result)
+ if row["predicate"] in ["owl:equivalentClass"]:
+ result = {
+ "super": row["subject"],
+ "sub": o,
+ }
+ reasoned.append(result)
+ return reasoned
+
+@nest_log
+def quote(label):
+ if re.search(r'\W', label):
+ return f"'{label}'"
+ return label
+
+@nest_log
+def ofs2omn(labels, ofs):
+ """Convert OFS to Manchester (OMN) with labels.
+ From
+ ["ObjectSomeValuesFrom", "ex:part-of", "ex:bar"]
+ to
+ 'part of' some Bar
+ """
+ first = ofs[0]
+ if first == "ObjectSomeValuesFrom":
+ onProperty = quote(labels.get(ofs[1], ofs[1]))
+ someValuesFrom = quote(labels.get(ofs[2], ofs[2]))
+ return f"{onProperty} some {someValuesFrom}"
+ # TODO: handle all the OFN types
+ else:
+ raise Exception(f"Unhandled expression type '{first}' for: {ofs}")
+
+@nest_log
+def po2rdfa(labels, predicate, obj):
+ if isinstance(obj, str):
+ obj = {"object": obj}
+ if obj.get("object"):
+ o = obj["object"]
+ if isinstance(o, str):
+ if o.startswith("<"):
+ o = o[1:-1]
+ return [
+ "a",
+ {
+ "href": o,
+ "property": predicate,
+ },
+ labels.get(o, o),
+ ]
+ try:
+ return ofs2rdfa(labels, rdf2ofs(o))
+ except:
+ return ["span", str(o)]
+ elif obj.get("value"):
+ return [
+ "span",
+ {"property": predicate},
+ obj["value"],
+ ]
+ else:
+ raise Exception(f"Unhandled object: {obj}")
+
+@nest_log
+def ofs2rdfa(labels, ofs):
+ """Convert an OFS list to an HTML vector."""
+ first = ofs[0]
+ if first == "ObjectSomeValuesFrom":
+ onProperty = po2rdfa(labels, "owl:onProperty", ofs[1])
+ someValuesFrom = po2rdfa(labels, "owl:someValuesFrom", ofs[2])
+ return ["span", onProperty, " some ", someValuesFrom]
+ elif first == "RDFList":
+ return ["span", "TODO " + str(ofs)]
+ # TODO: handle all the OFN types
+ else:
+ raise Exception(f"Unhandled expression type '{first}' for: {ofs}")
+
+@nest_log
+def rows2labels(rows):
+ """Given a list of rows, return a map from subject to rdfs:label value."""
+ labels = {}
+ for row in rows:
+ if row["predicate"] == "rdfs:label":
+ labels[row["subject"]] = row["value"]
+ return labels
+
+@nest_log
+def subject2rdfa(labels, subject_id, predicates):
+ """Convert a subject_id and predicate map to an HTML vector."""
+ html = ["ul"]
+ for predicate in sorted(list(predicates.keys())):
+ for obj in predicates[predicate]:
+ html.append(["li", po2rdfa(labels, predicate, obj)])
+ return ["li", subject_id, html]
+
+@nest_log
+def subjects2rdfa(labels, subjects):
+ """Convert a subject_id and subjects map to an HTML vector."""
+ html = ["ul"]
+ for subject_id in sorted(list(subjects.keys())):
+ html.append(subject2rdfa(labels, subject_id, subjects[subject_id]))
+ return html
+
+
+if __name__ == "__main__":
+ p = ArgumentParser("prototype.py", description="First pass at thick triples prototype")
+ p.add_argument("-f", "--filter", nargs="+", default=[],
+ help="filter only on the given comma-separated list of stanzas")
+ p.add_argument("-l", "--log", action='store_true')
+ p.add_argument('THIN', help='The file, in TSV format, that contains thin triples to convert.')
+ p.add_argument('ONTOLOGY', help='The ontology file to use for the round-trip test.')
+ args = p.parse_args()
+ debug = bool(args.log)
+ thin_input = args.THIN
+ expected_file = args.ONTOLOGY
+
+ rdfList = {'rdf:type': [{'object': 'rdf:List'}],
+ 'rdf:first': [{'value': 'A'}],
+ 'rdf:rest': [{'object': {'rdf:type': [{'object': 'rdf:List'}],
+ 'rdf:first': [{'value': 'B'}],
+ 'rdf:rest': [{'object': 'rdf:nil'}]}}]}
+ log("List {}".format(rdf2ofs(rdfList)))
+
+ print("Reading in thin rows ...", file=sys.stderr)
+ with open(thin_input) as fh:
+ thin = list(csv.DictReader(fh, delimiter="\t"))
+ if args.filter:
+ pruned_thin = [row for row in thin if row['stanza'] in args.filter]
+ else:
+ pruned_thin = []
+
+ if args.filter and not pruned_thin:
+ print("WARNING No stanzas corresponding to {} in db".format(', '.join(args.filter)))
+ thin = thin if not pruned_thin else pruned_thin
+
+ ############################
+ ####### Generate thick rows
+ ############################
+ print("Generating thick rows ...", file=sys.stderr)
+ with open("build/prefixes.n3", "w") as fh:
+ for prefix in prefixes:
+ print("@prefix {}: {} .".format(prefix, prefixes[prefix].strip('<>')), file=fh)
+
+ thin_by_stanza = {}
+ for t in thin:
+ if t['stanza'] not in thin_by_stanza:
+ thin_by_stanza[t['stanza']] = []
+ thin_by_stanza[t['stanza']].append(t)
+
+ thick = []
+ for (stanza, thin) in thin_by_stanza.items():
+ subjects = thin2subjects(thin)
+ thick += subjects2thick(subjects)
+
+ #print(pformat(thick))
+ #sys.exit(0)
+
+ ############################
+ # Round-trip: go from thick rows to thin triples, build a graph, and then compare to the
+ # original.
+ ############################
+ print("Generating graph ...", file=sys.stderr)
+ triples = thicks2triples(thick)
+ with open("build/triples.json", "w") as fh:
+ print(pformat(triples), file=fh)
+
+ actual = Graph()
+ [actual.add((triple['subject'], triple['predicate'], triple['object'])) for triple in triples]
+ with open("build/triples.n3", "w") as fh:
+ render_graph(actual, fh)
+
+ expected = Graph()
+ if expected_file.endswith(".ttl"):
+ expected.parse(expected_file, format="ttl")
+ else:
+ expected.parse(expected_file)
+
+ with open("build/expected.ttl", "w") as fh:
+ print(expected.serialize(format="n3").decode("utf-8"), file=fh)
+ with open("build/actual.ttl", "w") as fh:
+ print(actual.serialize(format="n3").decode("utf-8"), file=fh)
+
+ print("Comparing graph to expected graph ...", file=sys.stderr)
+ try:
+ compare_graphs(actual, expected, True)
+ except AssertionError as e:
+ print("Graphs are not identical. Full dumps can be found in build/actual.ttl "
+ "and build/expected.ttl")
+ else:
+ print("Graphs are identical")
+
+ # Wait on this one for now ...
+ #reasoned = thick2reasoned(thick)
+ #ofs = reasoned[0]["super"]
+ #labels = {
+ # "ex:part-of": "part of",
+ # "ex:bar": "Bar",
+ #}
+ #print("OFS {}".format(ofs))
+ #print("OMN {}".format(ofs2omn(labels, ofs)))
+ #rdfa = ofs2rdfa(labels, ofs)
+ #print("RDFa {}".format(rdfa))
+ #print("HTML {}".format(render(prefixes, rdfa)))
+ #rdfa = subject2rdfa(labels, "ex:foo", subjects["ex:foo"])
+ ##print("RDFa {}".format(rdfa))
+ #print("HTML\n" + render(prefixes, rdfa))
diff --git a/tests/resources/example.rdf b/tests/resources/example.rdf
new file mode 100644
index 0000000..aed65f0
--- /dev/null
+++ b/tests/resources/example.rdf
@@ -0,0 +1,66 @@
+
+
+
+
+
+ A
+
+
+ B
+
+
+
+
+
+
+ 123
+ Foo
+ Fou
+
+
+
+
+
+
+
+
+ OWL axiom annotation
+
+
+
+
+
+
+
+
+ RDF metadata
+
+
diff --git a/tests/resources/prefix.tsv b/tests/resources/prefix.tsv
index b6a7aea..0afc7dd 100644
--- a/tests/resources/prefix.tsv
+++ b/tests/resources/prefix.tsv
@@ -8,7 +8,7 @@ dce http://purl.org/dc/elements/1.1/
dct http://purl.org/dc/terms/
foaf http://xmlns.com/foaf/0.1/
protege http://protege.stanford.edu/plugins/owl/protege#
-ex http://example.com
+ex http://example.com/
BFO http://purl.obolibrary.org/obo/BFO_
CHEBI http://purl.obolibrary.org/obo/CHEBI_
CL http://purl.obolibrary.org/obo/CL_
diff --git a/tests/thick.tsv b/tests/thick.tsv
new file mode 100644
index 0000000..96bafa1
--- /dev/null
+++ b/tests/thick.tsv
@@ -0,0 +1,8 @@
+subject predicate object value datatype language annotations metadata
+ex:foo rdfs:label Foo
+ex:foo rdfs:label Fou fr
+ex:foo ex:size 123 xsd:int
+ex:foo ex:link "{""rdfs:comment"":[{""value"":""OWL axiom annotation"",""language"":""en""}]}" "{""rdfs:comment"":[{""value"":""RDF metadata"",""language"":""en""}]}"
+ex:foo rdf:type owl:Class
+ex:foo rdfs:subClassOf "{""rdf:type"":[{""object"":""owl:Restriction""}],""owl:onProperty"":[{""object"":""ex:part-of""}],""owl:someValuesFrom"":[{""object"":""ex:bar""}]}"
+ex:foo ex:items "[{""value"":""A""},{""value"":""B""}]"
diff --git a/tests/thin.tsv b/tests/thin.tsv
new file mode 100644
index 0000000..1396683
--- /dev/null
+++ b/tests/thin.tsv
@@ -0,0 +1,27 @@
+stanza subject predicate object value datatype language
+ex:foo ex:foo rdfs:label Foo
+ex:foo ex:foo rdfs:label Fou fr
+ex:foo ex:foo ex:size 123 xsd:int
+ex:foo ex:foo ex:link
+ex:foo ex:foo rdf:type owl:Class
+ex:foo ex:foo rdfs:subClassOf _:b1
+ex:foo _:b1 rdf:type owl:Restriction
+ex:foo _:b1 owl:onProperty ex:part-of
+ex:foo _:b1 owl:someValuesFrom ex:bar
+ex:foo ex:foo ex:items _:b2
+ex:foo _:b2 rdf:type rdf:List
+ex:foo _:b2 rdf:first A
+ex:foo _:b2 rdf:rest _:b3
+ex:foo _:b3 rdf:type rdf:List
+ex:foo _:b3 rdf:first B
+ex:foo _:b3 rdf:rest rdf:nil
+ex:foo _:b4 rdf:type owl:Axiom
+ex:foo _:b4 owl:annotatedSource ex:foo
+ex:foo _:b4 owl:annotatedProperty ex:link
+ex:foo _:b4 owl:annotatedTarget
+ex:foo _:b4 rdfs:comment OWL axiom annotation en
+ex:foo _:b5 rdf:type rdf:Statement
+ex:foo _:b5 rdf:subject ex:foo
+ex:foo _:b5 rdf:predicate ex:link
+ex:foo _:b5 rdf:object
+ex:foo _:b5 rdfs:comment RDF metadata en
diff --git a/tests/util.py b/tests/util.py
index 2433a7c..705b393 100644
--- a/tests/util.py
+++ b/tests/util.py
@@ -18,23 +18,29 @@
test_conn = {"host": POSTGRES_HOST, "database": "gizmos_test", "user": POSTGRES_USER, "password": POSTGRES_PW, "port": POSTGRES_PORT}
-def dump_ttl_sorted(graph):
- for line in sorted(graph.serialize(format="ttl").splitlines()):
+def dump_ttl(graph, sort):
+ lines = graph.serialize(format="ttl").splitlines()
+ if sort:
+ lines.sort()
+ for line in lines:
if line:
- print(line.decode("ascii"))
+ try:
+ print(line.decode("ascii"))
+ except UnicodeDecodeError:
+ print(line)
-
-def compare_graphs(actual, expected):
+def compare_graphs(actual, expected, show_diff=False, sort=False):
actual_iso = to_isomorphic(actual)
expected_iso = to_isomorphic(expected)
if actual_iso != expected_iso:
- _, in_first, in_second = graph_diff(actual_iso, expected_iso)
print("The actual and expected graphs differ")
- print("----- Contents of actual graph not in expected graph -----")
- dump_ttl_sorted(in_first)
- print("----- Contents of expected graph not in actual graph -----")
- dump_ttl_sorted(in_second)
+ if show_diff:
+ _, in_first, in_second = graph_diff(actual_iso, expected_iso)
+ print("----- Contents of actual graph not in expected graph -----")
+ dump_ttl(in_first, sort)
+ print("----- Contents of expected graph not in actual graph -----")
+ dump_ttl(in_second, sort)
assert actual_iso == expected_iso