diff --git a/deepsearch_glm/glm_utils.py b/deepsearch_glm/glm_utils.py index 1af1f8d9..e071ee29 100644 --- a/deepsearch_glm/glm_utils.py +++ b/deepsearch_glm/glm_utils.py @@ -43,8 +43,7 @@ def load_glm_config(idir:str): def load_glm(idir:str): config = load_glm_config(idir) - - #glm = andromeda_glm.glm_model() + glm = glm_model() glm.load(config) @@ -60,7 +59,7 @@ def create_glm_config_from_docs(odir:str, json_files:list[str], }, "save": { "root": odir, - "write-CSV": True, + "write-CSV": False, "write-JSON": False, "write-path-text": False } @@ -129,7 +128,6 @@ def create_glm_from_docs(odir:str, json_files:list[str], config = create_glm_config_from_docs(odir, json_files, nlp_models) - #glm = andromeda_glm.glm_model() glm = glm_model() glm.create(config) diff --git a/deepsearch_glm/nlp_apply_on_docs.py b/deepsearch_glm/nlp_apply_on_docs.py index 10cd3609..99d2739d 100644 --- a/deepsearch_glm/nlp_apply_on_docs.py +++ b/deepsearch_glm/nlp_apply_on_docs.py @@ -7,8 +7,9 @@ import pandas as pd -from utils.ds_utils import convert_pdffiles, to_legacy_document_format +from tabulate import tabulate +from utils.ds_utils import convert_pdffiles, to_legacy_document_format from deepsearch_glm.andromeda_nlp import nlp_model def parse_arguments(): @@ -100,6 +101,14 @@ def init_nlp_model(models:str, filters:list[str]=[]): return model +def show_texts(doc_j): + + data=[] + for item in doc_j["texts"]: + data.append([item["hash"], item["text-hash"], item["text"][0:48]]) + + print(tabulate(data, headers=["hash", "text-hash", "text"])) + def show_doc(doc_j): """ @@ -125,6 +134,9 @@ def show_doc(doc_j): print(json.dumps(doc_j["tables"][0], indent=2)) """ + if "texts" in doc_j: + show_texts(doc_j) + if "properties" in doc_j: props = pd.DataFrame(doc_j["properties"]["data"], columns=doc_j["properties"]["headers"]) diff --git a/deepsearch_glm/nlp_train_reference.py b/deepsearch_glm/nlp_train_reference.py index 3a5897df..228ac94e 100644 --- a/deepsearch_glm/nlp_train_reference.py +++ b/deepsearch_glm/nlp_train_reference.py @@ -6,6 +6,7 @@ import time import json import glob +import tqdm import argparse import random @@ -17,17 +18,13 @@ import pandas as pd import matplotlib.pyplot as plt -#import fasttext import textColor as tc -#import deepsearch as ds -#from tabulate import tabulate - -#import andromeda_nlp +from tabulate import tabulate from deepsearch_glm.andromeda_nlp import nlp_model from deepsearch_glm.utils.ds_utils import convert_pdffiles -from deepsearch_glm.nlp_utils import create_nlp_dir +from deepsearch_glm.nlp_utils import create_nlp_dir, init_nlp_model def parse_arguments(): @@ -40,208 +37,126 @@ def parse_arguments(): 1. end-to-end example on pdf documents: - poetry run python ./deepsearch_glm/nlp_train_reference.py -m all --pdf './data/documents/articles/*.pdf' - + poetry run python ./deepsearch_glm/nlp_train_semantic.py -m all --input-dir ' --output-dir ' """, formatter_class=argparse.RawTextHelpFormatter) - parser.add_argument('-m', '--mode', required=False, default="all", - help="parse: [convert,extract,annotate,classify,pure-classify,crf,pure-crf,all]") - - parser.add_argument('--pdf', required=True, - type=str, default=None, - help="filename(s) of pdf document") + parser.add_argument('-m', '--mode', required=True, default="all", + help="mode for training semantic model", + choices=["extract","annotate","train","all"]) - parser.add_argument('--json', required=False, + parser.add_argument('--input-dir', required=False, type=str, default=None, - help="filename(s) of json document") + help="input directory with documents") parser.add_argument('--output-dir', required=False, - type=str, default=create_nlp_dir(), - help="output root directory for trained models") - - """ - parser.add_argument('', '--source-directory', required=False, default="./data/documents/articles", - help="directory with pdfs") - parser.add_argument('-t', '--target-directory', required=False, default="./data/models/", - help="directory for target files") - """ + type=str, default="./reference-models", + help="output directory for trained models") + + parser.add_argument('--max-items', required=False, + type=int, default=-1, + help="number of references") args = parser.parse_args() - pdf = args.pdf - json = args.json + idir = args.input_dir + + if args.output_dir==None: + odir = create_nlp_dir() + + elif not os.path.exists(args.output_dir): + os.mkdir(args.output_dir) + odir = args.output_dir - if pdf==None and json==None: - exit(-1) - - if pdf!=None: - pdf_files=sorted(glob.glob(pdf)) else: - pdf_files=[] + odir = args.output_dir - if json!=None: - json_files=sorted(glob.glob(json)) - else: - json_files=[] - - if not os.path.exists(args.output_dir): - os.mkdir(args.output_dir) - - return args.mode, pdf_files, json_files, args.output_dir + return args.mode, args.input_dir, odir, args.max_items def shorten_text(text): ntext = text.replace("\n", "") return ntext.strip() - -def extract_references(filenames, sfile, rfile): - print(f"extract references for filenames: ", len(filenames)) - - config = { - "mode" : "apply", - "order" : True, - "models": "numval,link" - } - - #model = andromeda_nlp.nlp_model() - model = nlp_model() - model.initialise(config) - - MINLEN = 5 +def extract_references(filenames, ofile): - fws = open(sfile, "w") - fwr = open(rfile, "w") - - for filename in filenames: + nlp_model = init_nlp_model("semantic") - if filename.endswith("references.json"): - continue - - print(f"reading {filename}") + fw = open(ofile, "w") + + total=0 + for filename in tqdm.tqdm(filenames): + #print(f"reading {filename}") try: with open(filename, "r") as fr: - data = json.load(fr) + idoc = json.load(fr) except: continue - - with open(filename, "w") as fw: - fw.write(json.dumps(data, indent=2)) - is_ref=False - cnt_ref=0 + if random.random()<0.9: + training_sample = True + else: + training_sample = False - for item in data["main-text"]: + odoc = nlp_model.apply_on_doc(idoc) - if "text" in item: - text = item["text"].strip() - else: - continue + props = pd.DataFrame(odoc["properties"]["data"], + columns=odoc["properties"]["headers"]) - if "type" in item: - label = item["type"] - else: - continue + props_refs = props[props["label"]=="reference"] + #print(props_refs) + refs_hash = list(props_refs["subj_hash"]) - label = (label.split("-"))[0] + texts = pd.DataFrame.from_records(odoc["texts"]) + #print(texts) - content = text.lower().strip().replace(" ", "") - - if content.endswith("references"): - is_ref = True - elif is_ref and label=="subtitle": - is_ref = False - - if is_ref and (not content.endswith("references")) and len(text)>=MINLEN: - label = "reference" - elif re.match("^(\d+|\[\d+\])(.*)\((19|20)\d{2}\)\.?$", text): - label = "reference" - elif re.match("^(\[\d+\])(\s+[A-Z]\.)+.*", text): - label = "reference" - elif re.match("^(\[\])(.*)\((19|20)\d{2}\)\.?$", text): - label = "reference" - elif re.match("^(Table|Figure)(\s+\d+(\.\d+)?)(.*)", text): - label = "caption" - elif len(text.strip())0.95][["confidence", "text"]]) - if True: + for i,ref in refs.iterrows(): - """ - print(tabulate(nlpres["word-tokens"]["data"], - headers=nlpres["word-tokens"]["headers"])) - """ + if ref["confidence"]>0.95 and len(ref["text"])>32: + item = {"training-sample": training_sample, "text": ref["text"]} + fw.write(json.dumps(item)+"\n") - """ - print(tabulate(nlpres["properties"]["data"], - headers=nlpres["properties"]["headers"])) - """ - - tind = nlpres["properties"]["headers"].index("type") - lind = nlpres["properties"]["headers"].index("label") - cind = nlpres["properties"]["headers"].index("confidence") + #input("continue ...") + + """ + for item in odoc["texts"]: - found=False - for i,row in enumerate(nlpres["properties"]["data"]): - if row[tind]=="semantic": - row[lind] = label - row[cind] = 1.0 + if "properties" not in item: + continue + - found = True + if (df[df["type"]=="semantic"]["label"]=="reference").bool(): + #print(item["text"]) - if not found: - nlpres["properties"]["data"].append(["semantic", label, 1.0]) - found = True + total += 1 if random.random()<0.9: - nlpres["training-sample"] = True + training_sample = True else: - nlpres["training-sample"] = False - - if found: - fws.write(json.dumps(nlpres)+"\n") - - if cnt_ref>0: - print(tc.green(f"{filename}: {cnt_ref}")) - else: - print(tc.yellow(f"{filename}: {cnt_ref}")) + training_sample = False - fws.close() - fwr.close() + item = {"training-sample": training_sample, "text": item["text"]} + fw.write(json.dumps(item)+"\n") + """ + + fw.close() - print(f"semantic-classification dumped in {sfile}") - print(f"references dumped in {rfile}") + print("#-items: ", total) -def parse_with_anystyle_api(tlines): +def parse_with_anystyle_api(refs): time.sleep(1) tmpfile = "tmp.json" payload = { "input": [] } - for tline in tlines: - payload["input"].append(tline[1]) + for ref in refs: + payload["input"].append(ref["text"]) anystyle_token = '9fEhg+39p0J60Bs+WTTwTMcqqTFAUYoyjLlp8nEys4wnfgACn0IoqravX8Exsx/+2q1p4sU7636DR22xUeneLg==' anystyle_session = '9GFKMlFoJwbMV6W1Z37YFsG9nbXLqmGicXVzL4r5mn4SqTLcf0revMMFvAjfxcjqR8YBnj2M0fgTWBW12kK1KMFcOgZvZnwQv5lZZ3PQgPP9sait9WgoDR72BHqRpbPe0c1B6%2BNFtYE7aqpugLsTupqBuj%2B%2Fef0tbyd84wC61GkVA9Vtz2nSNC90hDliCre%2BZ2gQUc6runu6yt1M4xa0F8kM4Cxt2pN92XB8hRusqGNfsaCsw5JKdU%2FcDFtdh%2BYDSEBz6DjQFfJq81%2FTI%2F4ulku7mlv73vOC7ew%3D--o%2B2gjgNJqgCjYf4V--3mSN%2FKmNt68WTJsxBh9Bww%3D%3D' @@ -281,9 +196,12 @@ def parse_with_anystyle_api(tlines): return [] -def update_references(refs, tlines): +def update_references(refs, label_map): + + results = parse_with_anystyle_api(refs) - results = parse_with_anystyle_api(tlines) + if len(results)!=len(refs): + return for j,item in enumerate(results): @@ -292,7 +210,8 @@ def update_references(refs, tlines): parts.append(row[1]) text = " ".join(parts) - if text!=tlines[j][1]: + if text!=refs[j]["text"]: + print("WARNING: mismatch text") continue beg=0 @@ -305,11 +224,9 @@ def update_references(refs, tlines): beg += charlen beg += 1 - ind = tlines[j][0] - - refs[ind]["word-tokens"]["headers"].append("true-label") + refs[j]["word_tokens"]["headers"].append("true-label") - for ri,row_i in enumerate(refs[ind]["word-tokens"]["data"]): + for ri,row_i in enumerate(refs[j]["word_tokens"]["data"]): label="__undef__" for rj,row_j in enumerate(item): @@ -317,24 +234,57 @@ def update_references(refs, tlines): label = row_j[0] break - refs[ind]["word-tokens"]["data"][ri].append(label) + if label in label_map: + label = label_map[label] + else: + ##print(label) + label = "null" + + refs[j]["word_tokens"]["data"][ri].append(label) """ - print(tabulate(refs[ind]["word-tokens"]["data"], - headers=refs[ind]["word-tokens"]["headers"])) + print(text) + print("\n\n", tabulate(refs[j]["word_tokens"]["data"], + headers=refs[j]["word_tokens"]["headers"])) """ - refs[ind]["annotated"]=True - - tlines=[] + refs[j]["annotated"]=True + +def annotate(rfile, ofile, max_items): + + label_map = { + "author": "authors", + "title": "title", + "container-title": "conference", + "journal": "journal", + "date": "date", + "volume": "volume", + "pages": "pages", + "citation-number": "reference-number", + "note": "note", + "url": "url", + "doi": "doi", + "isbn": "isbn", + "publisher": "publisher" + } + + nlp_model = init_nlp_model("semantic", filters=["properties", "word_tokens"]) -def annotate(rfile, ofile): + num_lines = sum(1 for _ in open(rfile)) + if max_items!=-1: + max_items = min(max_items, num_lines) + else: + max_items = num_lines + refs=[] fr = open(rfile, "r") + fw = open(ofile, "w") - while True: + cnt = 0 + + for i in tqdm.tqdm(range(0,max_items)): line = fr.readline().strip() if line==None or len(line)==0: @@ -342,37 +292,32 @@ def annotate(rfile, ofile): try: item = json.loads(line) - refs.append(item) + ref = nlp_model.apply_on_text(item["text"]) + + ref["training-sample"] = item["training-sample"] + + refs.append(ref) + cnt += 1 except: continue - fr.close() + if len(refs)>=16: - print("#-refs: ", len(refs)) - - tlines=[] - for ind,ref in enumerate(refs): + #print(f"\rreference-annotation: {cnt}/{num_lines}", end="") + update_references(refs, label_map) - print(f"\rreferennce-annotation: {ind}/{len(refs)}", end="") - - refs[ind]["annotated"]=False - tlines.append([ind, ref["text"]]) - - if len(tlines)>0 and len(tlines)%16==0: - update_references(refs, tlines) - tlines=[] + for ref in refs: + if "annotated" in ref and ref["annotated"]: + fw.write(json.dumps(ref)+"\n") - print(" --> done") - - if len(tlines)>0: - update_references(refs, tlines) - - fw = open(ofile, "w") + refs=[] - for ref in refs: - if "annotated" in ref and ref["annotated"]: - fw.write(json.dumps(ref)+"\n") + #if max_items!=-1 and cnt>max_items: + # break + + print(" --> done") + fr.close() fw.close() print(f"writing annotation to {ofile}") @@ -398,7 +343,7 @@ def prepare_for_crf(afile): except: continue - wt = item["word-tokens"] + wt = item["word_tokens"] if item["annotated"]: @@ -566,31 +511,33 @@ def train_fst(train_file, model_file, metrics_file): model.train(config) -if __name__ == '__main__': +def create_reference_model(mode:str, idir:str, odir:str, max_items:int=-1): - mode, pdf_files, json_files, tdir = parse_arguments() + json_files = glob.glob(os.path.join(idir, "*.json")) + print("#-docs: ", len(json_files)) - if len(pdf_files)>0: - new_json_files = convert_pdffiles(pdf_files, force=False) - - for _ in new_json_files: - json_files.append(_) + sfile = os.path.join(odir, "nlp-references.data.jsonl") + afile = os.path.join(odir, "nlp-references.annot.jsonl") - json_files = sorted(list(set(json_files))) + crf_model_file = os.path.join(odir, "crf_reference") + crf_metrics_file = crf_model_file+".metrics.txt" - sfile = os.path.join(tdir, "nlp-train-semantic-classification.annot.jsonl") + """ rfile = os.path.join(tdir, "nlp-train-references-crf.jsonl") - afile = os.path.join(tdir, "nlp-train-references-crf.annot.jsonl") - - crf_model_file = os.path.join(tdir, "crf_reference") - fst_model_file = os.path.join(tdir, "fst_sematic") + fst_model_file = os.path.join(tdir, "fst_sematic") + """ + if mode=="extract" or mode=="all": - extract_references(json_files, sfile, rfile) + extract_references(json_files, sfile, max_items) if mode=="annotate" or mode=="all": - annotate(rfile, afile) + annotate(sfile, afile, max_items) + + if mode=="train" or mode=="all": + train_crf(afile, crf_model_file, crf_metrics_file) + """ if "classify" in mode or mode=="all": if mode=="classify" or mode==all: @@ -604,4 +551,10 @@ def train_fst(train_file, model_file, metrics_file): prepare_for_crf(afile) train_crf(afile, crf_model_file, crf_model_file+".metrics.txt") - + """ + +if __name__ == '__main__': + + mode, idir, odir, max_items = parse_arguments() + + create_reference_model(mode, idir, odir, max_items) diff --git a/deepsearch_glm/nlp_train_semantic.py b/deepsearch_glm/nlp_train_semantic.py index 69ca3fdb..bc3569e4 100644 --- a/deepsearch_glm/nlp_train_semantic.py +++ b/deepsearch_glm/nlp_train_semantic.py @@ -43,7 +43,7 @@ def parse_arguments(): choices=["retrieve","prepare","process","train","eval","refine","all"]) parser.add_argument('--input-dir', required=False, - type=str, default=None, + type=str, default="./semantic-models/documents", help="input directory with documents") parser.add_argument('--output-dir', required=False, @@ -72,13 +72,28 @@ def retrieve_data_pubmed(sdir): os.mkdir(sdir) index="pubmed" - query="*" + query="description.publication_date:[2022-01-01 TO 2022-03-01]" odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "references", "description"], force=True, limit=1000) return odir +def retrieve_data_arxiv(sdir): + + tdir = os.path.join(sdir, "arxiv") + + if not os.path.exists(sdir): + os.mkdir(sdir) + + index="arxiv" + query="description.publication_date:[2022-01-01 TO 2022-03-01]" + + odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "description", "main-text"], + force=True, limit=50000) + + return odir + def retrieve_data(sdir, index): tdir = os.path.join(sdir, index) @@ -89,7 +104,7 @@ def retrieve_data(sdir, index): query="*" odir = ds_index_query(index, query, tdir, sources=["_name", "file-info", "description", "main-text"], - force=True, limit=1000) + force=True, limit=50000) return odir @@ -101,50 +116,153 @@ def get_data(): for item in doc["references"]: data.append({"label":"reference", "text":item["text"], "document-hash":dhash}) - if "description" in doc: - desc = doc["description"] + """ + +def prepare_data_from_legacy_documents(doc): + + if "file-info" in doc: + dhash = doc["file-info"]["document-hash"] + else: + dhash = -1 + + N = len(doc["main-text"]) + + title_ind=len(doc["main-text"]) + + abs_beg=len(doc["main-text"]) + intro_beg=len(doc["main-text"]) + + ref_beg=len(doc["main-text"]) + ref_end=len(doc["main-text"]) + + data=[] + for i,item in enumerate(doc["main-text"]): + + if "text" not in item: + continue + + label = item["type"].lower() + text = item["text"].lower().strip() + + if "title" == label and title_ind==N: + title_ind=i + + if ("title" in label) and ("abstract" in text) and abs_beg==N: + abs_beg=i - if "title" in desc: - #data.append({"label":"title", "text":desc["title"], "document-hash":dhash}) - data.append({"label":"text", "text":desc["title"], "document-hash":dhash}) + if (text.startswith("abstract")) and abs_beg==N: + abs_beg=i - if "abstract" in desc: - for item in desc["abstract"]: - data.append({"label":"text", "text":item, "document-hash":dhash}) - - affiliations=[] - if "affiliations" in desc: - for item in desc["affiliations"]: - affiliations.append(item["name"]) - #data.append({"label":"affiliation", "text":item["name"], "document-hash":dhash}) - data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) - - authors=[] - if "authors" in desc: - for item in desc["authors"]: - authors.append(item["name"]) - #data.append({"label":"person_name", "text":item["name"], "document-hash":dhash}) - data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) - - if len(authors)>1: - data.append({"label":"meta-data", "text": ", ".join(authors), "document-hash":dhash}) - data.append({"label":"meta-data", "text": "; ".join(authors), "document-hash":dhash}) - - if len(affiliations)>1: - data.append({"label":"meta-data", "text": ", ".join(affiliations), "document-hash":dhash}) - data.append({"label":"meta-data", "text": "; ".join(affiliations), "document-hash":dhash}) + if ("title" in label) and ("introduction" in text) and intro_beg==N: + intro_beg=i + + if ("title" in label) and ("references" in text) and ref_beg==N: + ref_beg=i + + #(("title" in label) or ("caption" in label)) and ("reference" not in text): + if (ref_end==N and ref_begref_beg and + (("title" in label)) and ("reference" not in text)): + ref_end=i + + if title_ind==N or abs_beg==N or ref_beg==N: + return data + + print(dhash) + for i,item in enumerate(doc["main-text"]): + + if "text" not in item: + continue + + type_ = item["type"] + label = item["type"] + text = item["text"] + + skip = ((len(text)<=1) or (len(text.split(" "))==1)) and ("title" not in label) and (len(text)<=5) + if skip: + #print(f"skipping: {text}") + continue + + if title_ind=1 and len(affiliations)>=1: + if random.random()<0.9: + training_sample = True + else: + training_sample = False + + data.append({"document-hash":dhash, "label":label, "text":item["text"], "training-sample": training_sample}) - for _ in authors: - for __ in affiliations: - data.append({"label":"meta-data", "text": " ".join([_, __]), "document-hash":dhash}) - """ + return data + +def prepare_data_from_description(doc): + + if "file-info" in doc: + dhash = doc["file-info"]["document-hash"] + else: + dhash = -1 + data=[] + + if "references" in doc: + for item in doc["references"]: + data.append({"label":"reference", "text":item["text"], "document-hash":dhash}) -def prepare_data(json_files, data_file): + if "description" in doc: + + desc = doc["description"] + if "title" in desc: + data.append({"label":"text", "text":desc["title"], "document-hash":dhash}) + + if "abstract" in desc: + for item in desc["abstract"]: + data.append({"label":"text", "text":item, "document-hash":dhash}) + + affiliations=[] + if "affiliations" in desc: + for item in desc["affiliations"]: + affiliations.append(item["name"]) + data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) + + authors=[] + if "authors" in desc: + for item in desc["authors"]: + authors.append(item["name"]) + data.append({"label":"meta-data", "text":item["name"], "document-hash":dhash}) + + if len(authors)>1: + data.append({"label":"meta-data", "text": ", ".join(authors), "document-hash":dhash}) + + if len(affiliations)>1: + data.append({"label":"meta-data", "text": ", ".join(affiliations), "document-hash":dhash}) + + if len(authors)>=1 and len(affiliations)>=1: + for _ in authors: + for __ in affiliations: + data.append({"label":"meta-data", "text": " ".join([_, __]), "document-hash":dhash}) + + return data + +def prepare_data(json_files, data_file): + num_lines=0 fw = open(data_file, "w") @@ -159,92 +277,12 @@ def prepare_data(json_files, data_file): except: continue - if "file-info" in doc: - dhash = doc["file-info"]["document-hash"] - else: - dhash = -1 - if "main-text" in doc: - - N = len(doc["main-text"]) - - title_ind=len(doc["main-text"]) - - abs_beg=len(doc["main-text"]) - intro_beg=len(doc["main-text"]) - - ref_beg=len(doc["main-text"]) - ref_end=len(doc["main-text"]) + data = prepare_data_from_legacy_documents(doc) + #continue + else: + data = prepare_data_from_description(doc) - for i,item in enumerate(doc["main-text"]): - - if "text" not in item: - continue - - label = item["type"].lower() - text = item["text"].lower().strip() - - if "title" == label and title_ind==N: - title_ind=i - - if ("title" in label) and ("abstract" in text) and abs_beg==N: - abs_beg=i - - if ("title" in label) and ("introduction" in text) and intro_beg==N: - intro_beg=i - - if ("title" in label) and ("reference" in text) and ref_beg==N: - ref_beg=i - - if ref_end==N and ref_begref_beg and (("title" in label) or ("caption" in label)) and ("reference" not in text): - ref_end=i - - if title_ind==N or abs_beg==N or ref_beg==N: - continue - - for i,item in enumerate(doc["main-text"]): - - if "text" not in item: - continue - - type_ = item["type"] - label = item["type"] - text = item["text"] - - skip = ((len(text)<=1) or (len(text.split(" "))==1)) and ("title" not in label) and (len(text)<=5) - if skip: - #print(f"skipping: {text}") - continue - - if title_ind"] license = "MIT" diff --git a/src/andromeda/enums/structs.h b/src/andromeda/enums/structs.h index 208b6d19..6b99a2dc 100644 --- a/src/andromeda/enums/structs.h +++ b/src/andromeda/enums/structs.h @@ -5,21 +5,21 @@ namespace andromeda { - enum subject_name { UNDEF, - //TEXT, - PROMPT, - //PARAGRAPH, TABLE, FIGURE, - TEXT, TABLE, FIGURE, - DOCUMENT}; + enum subject_name { UNDEF=-1, + PROMPT=0, + DOCUMENT=1, + TEXT=2, + TABLE=3, + FIGURE=4}; const static std::vector SUBJECT_NAMES = { UNDEF, - //TEXT, PROMPT, - //PARAGRAPH, TABLE, FIGURE, - TEXT, TABLE, FIGURE, - DOCUMENT + DOCUMENT, + TEXT, + TABLE, + FIGURE }; std::string to_string(subject_name name) @@ -29,13 +29,12 @@ namespace andromeda case UNDEF: return "UNDEF"; case PROMPT: return "PROMPT"; + + case DOCUMENT: return "DOCUMENT"; - //case PARAGRAPH: return "PARAGRAPH"; case TEXT: return "TEXT"; case TABLE: return "TABLE"; case FIGURE: return "FIGURE"; - - case DOCUMENT: return "DOCUMENT"; } return "UNKNOWN_SUBJECT"; diff --git a/src/andromeda/glm/model_cli/create/model_creator.h b/src/andromeda/glm/model_cli/create/model_creator.h index 0cc28999..13f3f621 100644 --- a/src/andromeda/glm/model_cli/create/model_creator.h +++ b/src/andromeda/glm/model_cli/create/model_creator.h @@ -221,10 +221,10 @@ namespace andromeda text_node = nodes.insert(text_node, false); text_hash = text_node.get_hash(); - LOG_S(INFO) << "inserted node: " << doc_path; + //LOG_S(INFO) << "inserted node: " << doc_path; } - std::vector& tokens = subj.word_tokens; + std::vector& tokens = subj.get_word_tokens(); std::vector& instances = subj.instances; std::vector& relations = subj.relations; @@ -376,7 +376,7 @@ namespace andromeda { continue; } - std::vector& tokens = subj(i,j).word_tokens; + std::vector& tokens = subj(i,j).get_word_tokens(); //LOG_S(INFO) << "(i, j): " << i << ", " << j; //LOG_S(INFO) << andromeda::tabulate(tokens, subj(i,j).text); @@ -393,16 +393,16 @@ namespace andromeda for(auto itr=subj.insts_beg({i,j}); itr!=subj.insts_end({i,j}); itr++) { - assert(i==(itr->coor)[0]); - assert(j==(itr->coor)[1]); + assert(i==itr->get_coor(0)); + assert(j==itr->get_coor(1)); const base_instance& inst = *itr; //LOG_S(INFO) << "inst: " << inst.to_json().dump(); - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); - if(inst.model_type==andromeda::TERM and - inst.model_subtype=="single-term") + if(inst.is_model(TERM) and + inst.is_subtype("single-term")) { std::vector term_hashes={}; for(std::size_t i=rng[0]; i sent_rngs={}; for(auto& inst:instances) { - if(inst.model_type==andromeda::SENTENCE) + if(inst.is_model(SENTENCE)) { - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); sent_rngs.insert(rng); } } @@ -725,12 +726,12 @@ namespace andromeda { for(auto& inst:instances) { - if(inst.model_type==andromeda::TERM) + if(inst.is_model(TERM)) { nodes.get(beg_term_hash).incr_word_cnt();// += 1; nodes.get(end_term_hash).incr_word_cnt();// += 1; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); edges.insert(edge_names::to_label, tok_hashes.at(rng[0] ), beg_term_hash, false); edges.insert(edge_names::to_label, tok_hashes.at(rng[1]-1), end_term_hash, false); @@ -741,12 +742,12 @@ namespace andromeda edges.insert(edge_names::tax_up, end_term_hash, tok_hashes.at(rng[1]-1), false); } - if(inst.model_type==andromeda::SENTENCE) + if(inst.is_model(SENTENCE)) { nodes.get(beg_sent_hash).incr_word_cnt();// += 1; nodes.get(end_sent_hash).incr_word_cnt();// += 1; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); edges.insert(edge_names::to_label, tok_hashes.at(rng[0] ), beg_sent_hash, false); edges.insert(edge_names::to_label, tok_hashes.at(rng[1]-1), end_sent_hash, false); @@ -779,15 +780,15 @@ namespace andromeda { for(auto& inst:instances) { - if(inst.model_type==andromeda::EXPRESSION and - (inst.model_subtype=="name-concatenation" or - inst.model_subtype=="word-concatenation" or - inst.model_subtype=="latex-concatenation") and - inst.name.find("-")!=std::string::npos and - inst.name.find(" ")==std::string::npos and - (inst.wtok_range[1]-inst.wtok_range[0])==1) + if(inst.is_model(EXPRESSION) and + (inst.is_subtype("name-concatenation") or + inst.is_subtype("word-concatenation") or + inst.is_subtype("latex-concatenation")) and + inst.get_name().find("-")!=std::string::npos and + inst.get_name().find(" ")==std::string::npos and + (inst.get_wtok_range(1)-inst.get_wtok_range(0))==1) { - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); hash_type hash = tok_hashes.at(rng[0]); auto& node = nodes.get(hash); @@ -808,7 +809,7 @@ namespace andromeda base_node path(node_names::CONT, cont_hashes); nodes.insert(path, false); - rng_to_hash.emplace(inst.wtok_range, path.get_hash()); + rng_to_hash.emplace(inst.get_wtok_range(), path.get_hash()); for(std::size_t i=0; i hashes={}; for(std::size_t i=rng[0]; i term_hashes={}; for(std::size_t i=rng[0]; i verb_hashes={}; std::vector pos={}; @@ -994,7 +995,7 @@ namespace andromeda base_node path(node_names::VERB, verb_hashes); nodes.insert(path, false); - rng_to_verb.emplace(inst.wtok_range, path.get_hash()); + rng_to_verb.emplace(inst.get_wtok_range(), path.get_hash()); for(std::size_t i=0; i path_hashes={}; - auto rng = inst.wtok_range; + auto rng = inst.get_wtok_range(); for(index_type l=rng[0]; l::preprocess(const subject& subj, std::string& text) { - text = subj.text; + text = subj.get_text(); return true; } @@ -75,7 +75,7 @@ namespace andromeda auto& row = subj.data.at(i); for(std::size_t j=0; j::apply(subject& subj) { if(not satisfies_dependencies(subj)) @@ -141,10 +142,12 @@ namespace andromeda { this->apply(*para); - base_property prop("null", "null", 0.0); + base_property prop(para->get_hash(), TEXT, para->get_sref(), + "null", "null", 0.0); + if(get(*para, prop)) { - std::string key = prop.get_name(); + std::string key = prop.get_label(); std::size_t dst = para->dst; if(lang_mapping.count(key)==1) @@ -160,19 +163,20 @@ namespace andromeda } } - base_property prop(this->get_key(), "null", 0.0); + base_property prop(subj.get_hash(), DOCUMENT, "#", + this->get_key(), "null", 0.0); for(auto itr=lang_mapping.begin(); itr!=lang_mapping.end(); itr++) { double confidence = std::round(1000*(itr->second)/(0.0+total))/1000.0; if(itr==lang_mapping.begin()) { - prop.set_name(itr->first); + prop.set_label(itr->first); prop.set_conf(confidence); } else if(prop.get_conf()first); + prop.set_label(itr->first); prop.set_conf(confidence); } else @@ -183,6 +187,76 @@ namespace andromeda return update_applied_models(subj); } + */ + + bool nlp_model::apply(subject& subj) + { + if(not satisfies_dependencies(subj)) + { + return false; + } + + std::string text="", label="null"; + double conf=0.0; + + std::map lang_mapping; + std::size_t total=0; + + for(uint64_t ind=0; indget_len(); + total += para->get_len(); + } + else + { + lang_mapping[label] = para->get_len(); + total += para->get_len(); + } + } + + para->properties.emplace_back(para->get_hash(), TEXT, para->get_self_ref(), + get_name(), label, conf); + para->applied_models.insert(get_key()); + } + + base_property prop(subj.get_hash(), DOCUMENT, "#", + get_name(), "null", 0.0); + for(auto itr=lang_mapping.begin(); itr!=lang_mapping.end(); itr++) + { + double confidence = std::round(1000*(itr->second)/(0.0+total))/1000.0; + + if(itr==lang_mapping.begin()) + { + prop.set_label(itr->first); + prop.set_conf(confidence); + } + else if(prop.get_conf()first); + prop.set_conf(confidence); + } + else + {} + } + subj.properties.push_back(prop); + + return update_applied_models(subj); + } } diff --git a/src/andromeda/nlp/cls/semantic.h b/src/andromeda/nlp/cls/semantic.h index cc49b758..c3a5c23e 100644 --- a/src/andromeda/nlp/cls/semantic.h +++ b/src/andromeda/nlp/cls/semantic.h @@ -15,8 +15,8 @@ namespace andromeda 4. text The goal is to use the semantic labels downstream to extract meta-data - items and parse the references. - */ + items and parse the references. + */ template<> class nlp_model: public fasttext_supervised_model { @@ -25,29 +25,29 @@ namespace andromeda const static inline std::set known_headers = {"abstract", "introduction", "references", "conclusion"}; - + public: nlp_model(); nlp_model(std::filesystem::path resources_dir); - + ~nlp_model(); virtual std::set get_dependencies() { return dependencies; } - + virtual model_type get_type() { return CLS; } virtual model_name get_name() { return SEMANTIC; } - - template + + template bool get(subject_type& subj, base_property& prop); - + virtual bool preprocess(const subject& subj, std::string& text); virtual bool preprocess(const subject& subj, std::string& text); virtual bool apply(subject& subj); virtual bool apply(subject
& subj); virtual bool apply(subject& subj); - + private: void initialise(); @@ -56,26 +56,26 @@ namespace andromeda void initialise_model(); void get_semantic_mapping(); - + private: const static std::set dependencies; //std::filesystem::path resources_dir; - std::filesystem::path model_file; - + std::filesystem::path model_file; + std::vector author_list, authors; //std::vector table_refs, figure_refs; std::vector caption_refs; }; - const std::set nlp_model::dependencies = {LINK,NUMVAL}; + const std::set nlp_model::dependencies = {LINK, NUMVAL}; nlp_model::nlp_model(): fasttext_supervised_model(), model_file(glm_variables::get_fst_dir() / "semantic/fst_semantic.bin") { - initialise(); + initialise(); } nlp_model::~nlp_model() @@ -87,74 +87,74 @@ namespace andromeda initialise_model(); } - + void nlp_model::initialise_regex() { - // Yinhan Liu , Myle Ott , Naman Goyal , J . S . - A . Du , Mandar Joshi , Danqi Chen , Omer Levy , Mike Lewis , Luke Zettlemoyer , and Veselin Stoyanov + // Yinhan Liu , Myle Ott , Naman Goyal , J . S . - A . Du , Mandar Joshi , Danqi Chen , Omer Levy , Mike Lewis , Luke Zettlemoyer , and Veselin Stoyanov { std::string authors_str = R"(((?P(([A-Z][a-z]+\s)(([A-Z][a-z]+|[A-Z]\s\.|\-|\')\s)*([A-Z][a-z]+)))\s((\,|and|\&)\s)+)+(?P(([A-Z][a-z]+\s)(([A-Z][a-z]+|[A-Z]\s\.|\-|\')\s)*([A-Z][a-z]+))))"; - + pcre2_expr expr(this->get_key(), "__author_list__", authors_str); author_list.push_back(expr); } - // Y . Liu , M . Ott , N . Goyal , J . S . - A . Du , M . Joshi , D . Chen , O . Levy , M . Lewis , L . Zettlemoyer + // Y . Liu , M . Ott , N . Goyal , J . S . - A . Du , M . Joshi , D . Chen , O . Levy , M . Lewis , L . Zettlemoyer { std::string authors_str = R"(((?P((([A-Z]\s\.|\-)\s)+([A-Z][a-z]+)))\s((\,|\&|and)\s)+)+(?P((([A-Z]\s\.|\-)\s)+([A-Z][a-z]+))))"; pcre2_expr expr(this->get_key(), "__author_list__", authors_str); author_list.push_back(expr); } - + // __ival__ . Srivastava , R . - K . , Greff , K . & Schmidhuber , J . Highway networks . CoRR e - prints ( __year__ ) . arXiv : __fval__ . { std::string authors_str = R"((((?P(([A-Z][a-z]+\s)(\,\s)(([A-Z]\s\.|\-)\s)+)))((\,|\&|and)\s))+(?P(([A-Z][a-z]+\s)(\,\s)(([A-Z]\s\.|\-)\s)+)))"; pcre2_expr expr(this->get_key(), "__author_list__", authors_str); author_list.push_back(expr); - } + } { pcre2_expr expr(this->get_key(), "__author__", - R"((?P([A-Z][a-z]+)\s\,(\s[A-Z\-]\s\.)+)\s(\,|and|\&))"); + R"((?P([A-Z][a-z]+)\s\,(\s[A-Z\-]\s\.)+)\s(\,|and|\&))"); authors.push_back(expr); } { pcre2_expr expr(this->get_key(), "__author__", - R"((and|\&)\s(?P([A-Z][a-z]+)\s\,\s([A-Z\-]\s\.)+)\s)"); + R"((and|\&)\s(?P([A-Z][a-z]+)\s\,\s([A-Z\-]\s\.)+)\s)"); authors.push_back(expr); } { pcre2_expr expr(this->get_key(), "__author__", - R"((?P((\s[A-Z\-]\s\.)+\s([A-Z][a-z]+)))\s(\,|and|\&)+)"); + R"((?P((\s[A-Z\-]\s\.)+\s([A-Z][a-z]+)))\s(\,|and|\&)+)"); authors.push_back(expr); } - + { pcre2_expr expr(this->get_key(), "__table__", - R"(^(?P
Table|TABLE|Tab|TAB)(\s*\.)?(\s*)(?P(__(i|f)val__|[A-Z])))"); + R"(^(?P
Table|TABLE|Tab|TAB)(\s*\.)?(\s*)(?P(__(i|f)val__|[A-Z])))"); caption_refs.push_back(expr); } { pcre2_expr expr(this->get_key(), "__table__", - R"(^(?P
Table|TABLE|Tab|TAB))"); + R"(^(?P
Table|TABLE|Tab|TAB))"); caption_refs.push_back(expr); } { pcre2_expr expr(this->get_key(), "__figure__", - R"(^(?P
(Figure|FIGURE|Fig|FIG))(\s*\.)?(\s*)(?P(__(i|f)val__|[A-Z])))"); + R"(^(?P
(Figure|FIGURE|Fig|FIG))(\s*\.)?(\s*)(?P(__(i|f)val__|[A-Z])))"); caption_refs.push_back(expr); } { pcre2_expr expr(this->get_key(), "__figure__", - R"(^(?P
(Figure|FIGURE|Fig|FIG))(\s*\.)?)"); + R"(^(?P
(Figure|FIGURE|Fig|FIG))(\s*\.)?)"); caption_refs.push_back(expr); - } + } } void nlp_model::initialise_model() @@ -163,107 +163,88 @@ namespace andromeda if(not fasttext_supervised_model::load(model_file)) { - LOG_S(FATAL) << "could not load semantic model ..."; + LOG_S(FATAL) << "could not load semantic model ..."; } } - template + template bool nlp_model::get(subject_type& subj, base_property& property) { for(auto& prop:subj.properties) { - if(prop.get_type()==get_key()) - { - property = prop; - return true; - } + if(prop.get_type()==get_key()) + { + property = prop; + return true; + } } - + return false; } bool nlp_model::preprocess(const subject& subj, std::string& text) { - auto& wtokens = subj.word_tokens; + //auto& wtokens = subj.get_word_tokens(); - if(wtokens.size()==0) + //if(wtokens.size()==0) + if(subj.get_num_wtokens()==0) { - ///LOG_S(WARNING) << "word-tokens have not been set"; - - text.clear(); - return false; - } - - std::stringstream ss; - - std::size_t MAXLEN = 256; - for(std::size_t l=0; l0) - { - ss << "__" << *(tags.begin()) << "__"; - } - else - { - std::string text = token.get_word(); - ss << text; - } - - ss << " "; - } - - text = ss.str(); - //LOG_S(INFO) << __FUNCTION__ << " orig: " << text; - - /* - for(auto& expr:author_list) - { - std::vector items; - expr.find_all(text, items); - - for(auto& item:items) - { - text = utils::replace(text, item.text, "__author_list__"); - } + text.clear(); + return false; } - if(text.find("__author_list__")==std::string::npos) - { - for(auto& expr:authors) - { - std::vector items; - expr.find_all(text, items); - - for(auto& item:items) - { - for(auto& grp:item.groups) - { - if(grp.group_name=="author") - { - text = utils::replace(text, grp.text, "__author__"); - } - } - } - } - } + { + text = subj.get_text(); + + std::vector insts={}; + for(auto inst:subj.instances) + { + if(inst.is_model(NUMVAL) or inst.is_model(LINK)) + { + insts.push_back(inst); + } + } + + if(insts.size()>0) + { + std::sort(insts.begin(), insts.end()); + + std::size_t l=0; + std::stringstream ss; + + for(std::size_t i=0; i=256) + { + text = text.substr(0, 256); + } + } + } - for(auto& expr:caption_refs) - { - std::vector items; - expr.find_all(text, items); + //text = utils::to_lower(text); - for(auto& item:items) - { - text = utils::replace(text, item.text, "__caption_ref__"); - } - } - */ - - text = utils::to_lower(text); - return true; } @@ -272,11 +253,11 @@ namespace andromeda std::stringstream ss; for(std::size_t i=0; i::apply(subject& subj) - { - auto text = utils::to_lower(subj.text); + { + std::string text="", label="null"; + double conf=0.0; - if(known_headers.count(text)) + if(not preprocess(subj, text)) { - subj.properties.emplace_back(get_key(), "meta-data", 1.0); - return true; + return false; //continue; // skip continue; // skip } - else + + if(not classify(text, label, conf)) { - return fasttext_supervised_model::classify(subj); + return false; //continue; // skip } + + //LOG_S(INFO) << label << ", " << conf << ": " << text.substr(0, 64); + + subj.properties.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), + get_name(), label, conf); + subj.applied_models.insert(get_key()); + + return true; } bool nlp_model::apply(subject
& subj) @@ -311,65 +301,27 @@ namespace andromeda return false; } - uint64_t abs_ind=-1, intro_ind=-1, ref_ind=-1; for(uint64_t ind=0; indget_text(); - std::string ltext = utils::to_lower(otext); - - if(abs_ind==-1 and ltext.find("abstract")!=std::string::npos) - { - abs_ind = ind; - } - - if(intro_ind==-1 and ltext.find("introduction")!=std::string::npos) - { - intro_ind = ind; - } - - if(ref_ind==-1 and ltext.find("reference")!=std::string::npos) - { - ref_ind = ind; - } - } + auto& para = subj.texts.at(ind); - std::string text="", label="null"; - double conf=0.0; + this->apply(*para); + } - for(uint64_t ind=0; indproperties.emplace_back(key, label, conf); - para->applied_models.insert(key); + label = "meta-data"; } - + else if(ref_ind!=-1 and ind::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { @@ -120,7 +120,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), CITE, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/expression.h b/src/andromeda/nlp/ent/expression.h index c6a8f3da..a80b1540 100644 --- a/src/andromeda/nlp/ent/expression.h +++ b/src/andromeda/nlp/ent/expression.h @@ -24,7 +24,7 @@ namespace andromeda virtual model_name get_name() { return EXPRESSION; } virtual bool apply(subject& subj); - virtual bool apply(subject
& subj); + virtual bool apply_on_table_data(subject
& subj); private: @@ -307,7 +307,7 @@ namespace andromeda return true; } - bool nlp_model::apply(subject
& subj) + bool nlp_model::apply_on_table_data(subject
& subj) { if(not satisfies_dependencies(subj)) { @@ -340,7 +340,8 @@ namespace andromeda post_process(subj); - subj.contract_wtokens_from_instances(EXPRESSION); + // FIXME not sure ... + //subj.contract_wtokens_from_instances(EXPRESSION); //subj.show(false, false, false, true, false, true, false); @@ -355,17 +356,20 @@ namespace andromeda apply_abbr_regex(subj); - subj.contract_wtokens_from_instances(EXPRESSION); + // FIXME not sure ... + //subj.contract_wtokens_from_instances(EXPRESSION); for(auto& ent:subj.instances) { - if(ent.model_type==EXPRESSION and ent.model_subtype=="common" and ent.wtoken_len()==1) + if(ent.is_model(EXPRESSION) and ent.is_subtype("common") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.wtok_range[0]).set_word(ent.name); + //subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + subj.set_word(ent.get_wtok_range(0), ent.get_name()); } - else if(ent.model_type==EXPRESSION and ent.model_subtype=="apostrophe" and ent.wtoken_len()==1) + else if(ent.is_model(EXPRESSION) and ent.is_subtype("apostrophe") and ent.wtoken_len()==1) { - subj.word_tokens.at(ent.wtok_range[0]).set_word(ent.name); + //subj.word_tokens.at(ent.get_wtok_range(0)).set_word(ent.get_name()); + subj.set_word(ent.get_wtok_range(0), ent.get_name()); } else {} @@ -376,8 +380,8 @@ namespace andromeda bool nlp_model::apply_common_regex(subject& subj) { - //std::string orig = subj.text; - std::string text = subj.text; + //std::string orig = subj.get_text(); + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); @@ -413,7 +417,7 @@ namespace andromeda } //LOG_S(INFO) << __FUNCTION__ << " " << l << ": " << orig; - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -427,7 +431,7 @@ namespace andromeda bool nlp_model::apply_apostrophe_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); @@ -456,7 +460,7 @@ namespace andromeda orig = subj.from_ctok_range(ctok_range); name = utils::replace(orig, "'", ""); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -470,7 +474,7 @@ namespace andromeda bool nlp_model::apply_abbr_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(std::size_t l=0; l::apply_concatenation_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); // find all concat expressions for(auto& expr:concat_exprs) @@ -569,7 +573,7 @@ namespace andromeda if(keep) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -590,12 +594,12 @@ namespace andromeda { for(std::size_t j=0; j::apply_latex_regex(subject& subj) { - //std::string orig = subj.text; - std::string text = subj.text; + //std::string orig = subj.get_text(); + std::string text = subj.get_text(); for(auto& ent:subj.instances) { - if(ent.model_type==CITE) + if(ent.is_model(CITE)) { - utils::mask(text, ent.char_range); + utils::mask(text, ent.get_char_range()); } } - //std::size_t max_id = subj.get_max_ent_hash(); - // find all latex expressions bool found_new = true; while(found_new) @@ -696,7 +698,7 @@ namespace andromeda orig = subj.from_ctok_range(ctok_range); name = normalise(orig); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -716,16 +718,16 @@ namespace andromeda std::set forbidden_inds={}; for(auto& ent:subj.instances) { - if(ent.model_type==CITE) + if(ent.is_model(CITE)) { - for(std::size_t ind=ent.wtok_range[0]; ind wtoken_inds={}; for(std::size_t l=0; l::add_concatenated_expression(subject& subj, std::list wtoken_inds) { - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); std::set special_begins = {"\"", "'", "''", "{", "}", ".", ",", ";", "/"}; std::set special_endings = {".",",","?","!",":", ";", "\"", "'", "''"}; @@ -832,7 +834,7 @@ namespace andromeda { //std::size_t max_id = subj.get_max_ent_hash(); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), EXPRESSION, "wtoken-concatenation", name, orig, char_range, ctok_range, wtok_range); @@ -850,13 +852,13 @@ namespace andromeda { auto& ent = *itr; - if(ent.model_type==EXPRESSION and ent.model_subtype=="common") + if(ent.is_model(EXPRESSION) and ent.is_subtype("common")) { itr++; } - else if(ent.model_type==EXPRESSION) + else if(ent.is_model(EXPRESSION)) { - std::string orig = ent.orig; + std::string orig = ent.get_orig(); int cnt_$ = utils::count(orig, '$'); int diff_cnt_rb = utils::count_imbalance(orig, '(', ')'); @@ -864,7 +866,7 @@ namespace andromeda int diff_cnt_sb = utils::count_imbalance(orig, '[', ']'); std::vector words; - std::string text = ent.orig; + std::string text = ent.get_orig(); { while(true) @@ -892,7 +894,7 @@ namespace andromeda //LOG_S(WARNING) << ent.name << " ->" << words.size(); //for(auto word:words) //{ - //LOG_S(WARNING) << "\t ->" << word.text; + //LOG_S(WARNING) << "\t ->" << word.get_text(); //} if(orig.starts_with("(") and orig.ends_with(")")) @@ -938,14 +940,14 @@ namespace andromeda { for(auto itr_j=insts.begin(); itr_j!=insts.end(); itr_j++) { - auto cr_i = itr_i->char_range; - auto cr_j = itr_j->char_range; + auto cr_i = itr_i->get_char_range(); + auto cr_j = itr_j->get_char_range(); if(itr_i!=itr_j and - itr_i->model_type==EXPRESSION and - itr_j->model_type==EXPRESSION and + itr_i->is_model(EXPRESSION) and + itr_j->is_model(EXPRESSION) and cr_i==cr_j and - (itr_i->model_subtype)=="wtoken-concatenation") + itr_i->is_subtype("wtoken-concatenation")) { //LOG_S(INFO) << "removing: " << itr_i->orig << "; " << itr_i->name; @@ -953,8 +955,8 @@ namespace andromeda erasing=true; } if(itr_i!=itr_j and - itr_i->model_type==EXPRESSION and - itr_j->model_type==EXPRESSION and + itr_i->is_model(EXPRESSION) and + itr_j->is_model(EXPRESSION) and ((cr_j[0]<=cr_i[0] and cr_i[1]model_type==EXPRESSION and - (itr_j->model_type==NUMVAL or itr_j->model_type==NAME) and + else if(itr_i->is_model(EXPRESSION) and + (itr_j->is_model(NUMVAL) or itr_j->is_model(NAME)) and cr_i==cr_j) { //LOG_S(INFO) << "removing: " << itr_i->orig << "; " << itr_i->name; diff --git a/src/andromeda/nlp/ent/geoloc.h b/src/andromeda/nlp/ent/geoloc.h index defb36da..49ff9e77 100644 --- a/src/andromeda/nlp/ent/geoloc.h +++ b/src/andromeda/nlp/ent/geoloc.h @@ -14,37 +14,37 @@ namespace andromeda ~nlp_model(); virtual std::set get_dependencies() { return dependencies; } - + virtual model_type get_type() { return ENT; } virtual model_name get_name() { return GEOLOC; } virtual bool apply(std::string& text, nlohmann::json& annots); - + virtual bool apply(subject& subj); - virtual bool apply(subject
& subj); - + virtual bool apply_on_table_data(subject
& subj); + private: - + bool initialise(); - //bool apply_regex(subject& subj); + //bool apply_regex(subject& subj); //bool contract_regex(subject& subj); - + private: const static inline std::set allowed_subtypes={"continent", "country", "aquatic-region"}; - + const static std::set dependencies; std::vector exprs; - + std::filesystem::path asset_file; std::filesystem::path model_file; nlohmann::json assets; std::map l2s; // label to subtype (subtypes might have `-` in the name, which are not accepted as regex named groups) - + std::map h2j; // headers to column index std::map > l2inds={}; // label to row-indices @@ -52,10 +52,10 @@ namespace andromeda }; const std::set nlp_model::dependencies = {}; - + nlp_model::nlp_model(): exprs({}), - + asset_file(get_rgx_dir() / "geoloc/rgx_geoloc.json"), model_file(get_crf_dir() / "geoloc/crf_geoloc.bin"), @@ -73,16 +73,16 @@ namespace andromeda if(ifs) { - ifs >> assets; + ifs >> assets; } else { - LOG_S(ERROR) << "could not find " << asset_file; - - return false; + LOG_S(ERROR) << "could not find " << asset_file; + + return false; } - std::vector headers = {}; + std::vector headers = {}; headers = assets.value("headers", headers); auto& data = assets.at("data"); @@ -90,67 +90,67 @@ namespace andromeda h2j={}; for(index_type j=0; j(); - std::string label = utils::replace(subtype, "-", "_"); - - if(l2inds.count(label)) - { - l2inds.at(label).push_back(i); - } - else if(allowed_subtypes.count(subtype)) - { - l2inds[label] = {i}; - l2s[label] = subtype; - } + std::string subtype = data.at(i).at(subtype_cind).get(); + std::string label = utils::replace(subtype, "-", "_"); + + if(l2inds.count(label)) + { + l2inds.at(label).push_back(i); + } + else if(allowed_subtypes.count(subtype)) + { + l2inds[label] = {i}; + l2s[label] = subtype; + } } index_type delta=128; for(auto itr=l2inds.begin(); itr!=l2inds.end(); itr++) { - auto label = itr->first; - auto& inds = itr->second; - - //LOG_S(INFO) << "init geoloc subtype " << l2s.at(label) << ": " << l2inds.at(label).size(); - - index_type len = inds.size(); - for(index_type i0=0; i0"; - for(index_type i1=lb; i1(); - - ss << cexpr; - - if(i1+1get_key(), l2s.at(label), ss.str()); - exprs.push_back(expr); - } + auto label = itr->first; + auto& inds = itr->second; + + //LOG_S(INFO) << "init geoloc subtype " << l2s.at(label) << ": " << l2inds.at(label).size(); + + index_type len = inds.size(); + for(index_type i0=0; i0"; + for(index_type i1=lb; i1(); + + ss << cexpr; + + if(i1+1get_key(), l2s.at(label), ss.str()); + exprs.push_back(expr); + } } return (exprs.size()>0); @@ -161,66 +161,111 @@ namespace andromeda LOG_S(ERROR) << __FUNCTION__ << " on text not implemented ..."; return false; } - + bool nlp_model::apply(subject& subj) { //LOG_S(ERROR) << __FUNCTION__ << " on paragraph ..."; - + std::string text = subj.get_text(); for(auto& expr:exprs) { - std::vector items; - expr.find_all(text, items); - - for(auto& item:items) - { - for(auto& grp:item.groups) - { - if(l2inds.count(grp.group_name)==1) - { - // NOTE: in future, we might need to have individual post-processing - // to determine the range. - auto char_range = grp.rng; - - auto ctok_range = subj.get_char_token_range(char_range); - auto wtok_range = subj.get_word_token_range(char_range); - - std::string orig = subj.from_char_range(char_range); - std::string name = subj.from_ctok_range(ctok_range); - - subj.instances.emplace_back(subj.get_hash(), - GEOLOC, expr.get_subtype(), - name, orig, - char_range, ctok_range, wtok_range); - - //utils::mask(text, item.rng); - } - } - } + std::vector items; + expr.find_all(text, items); + + for(auto& item:items) + { + for(auto& grp:item.groups) + { + if(l2inds.count(grp.group_name)==1) + { + // NOTE: in future, we might need to have individual post-processing + // to determine the range. + auto char_range = grp.rng; + + auto ctok_range = subj.get_char_token_range(char_range); + auto wtok_range = subj.get_word_token_range(char_range); + + std::string orig = subj.from_char_range(char_range); + std::string name = subj.from_ctok_range(ctok_range); + + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + GEOLOC, expr.get_subtype(), + name, orig, + char_range, ctok_range, wtok_range); + + //utils::mask(text, item.rng); + } + } + } } for(auto itr=subj.instances.begin(); itr!=subj.instances.end(); ) { - if(not itr->is_wtok_range_match()) - { - itr = subj.instances.erase(itr); - } - else - { - itr++; - } + if(not itr->is_wtok_range_match()) + { + itr = subj.instances.erase(itr); + } + else + { + itr++; + } } - + return update_applied_models(subj); } - - bool nlp_model::apply(subject
& subj) + + bool nlp_model::apply_on_table_data(subject
& subj) { - return false; - //return rgx_model.apply(subj); + for(std::size_t i=0; i items; + expr.find_all(text, items); + + for(auto& item:items) + { + for(auto& grp:item.groups) + { + if(l2inds.count(grp.group_name)==1) + { + // NOTE: in future, we might need to have individual post-processing + // to determine the range. + auto char_range = grp.rng; + + auto ctok_range = subj(i,j).get_char_token_range(char_range); + auto wtok_range = subj(i,j).get_word_token_range(char_range); + + std::string orig = subj(i,j).from_char_range(char_range); + std::string name = subj(i,j).from_ctok_range(ctok_range); + + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + GEOLOC, expr.get_subtype(), + name, orig, + subj(i,j).get_coor(), + subj(i,j).get_row_span(), + subj(i,j).get_col_span(), + char_range, ctok_range, wtok_range); + } + } + } + } + } + } + + return true; } - + } #endif diff --git a/src/andromeda/nlp/ent/link.h b/src/andromeda/nlp/ent/link.h index 6a7ce0a6..00f4d97d 100644 --- a/src/andromeda/nlp/ent/link.h +++ b/src/andromeda/nlp/ent/link.h @@ -79,6 +79,12 @@ namespace andromeda exprs.push_back(expr); } + // Arxiv: "arXiv:2201.08390v1 [gr-qc] 20 Jan 2022" + { + pcre2_expr expr(this->get_key(), "arxiv", R"((?P(arXiv:(\d+).(\d+)(v\d*)? \[.+\] (\d+) [A-Za-z]+ \d+)))"); + exprs.push_back(expr); + } + return true; } @@ -98,7 +104,7 @@ namespace andromeda bool nlp_model::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { @@ -132,7 +138,7 @@ namespace andromeda // remove spaces name = utils::replace(name, " ", ""); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), LINK, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/name.h b/src/andromeda/nlp/ent/name.h index 08f7a7b1..26572c25 100644 --- a/src/andromeda/nlp/ent/name.h +++ b/src/andromeda/nlp/ent/name.h @@ -136,7 +136,7 @@ namespace andromeda bool nlp_model::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { std::vector items; @@ -189,7 +189,7 @@ namespace andromeda if(keep) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), NAME, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/numval.h b/src/andromeda/nlp/ent/numval.h index 270a0790..7230b072 100644 --- a/src/andromeda/nlp/ent/numval.h +++ b/src/andromeda/nlp/ent/numval.h @@ -21,7 +21,8 @@ namespace andromeda virtual model_name get_name() { return NUMVAL; } virtual bool apply(subject& subj); - virtual bool apply(subject
& subj); + + virtual bool apply_on_table_data(subject
& subj); private: @@ -139,8 +140,9 @@ namespace andromeda apply_regex(subj); //subj.show(); - - subj.contract_wtokens_from_instances(NUMVAL); + + // FIXME + //subj.contract_wtokens_from_instances(NUMVAL); //subj.show(); @@ -149,7 +151,7 @@ namespace andromeda bool nlp_model::apply_regex(subject& subj) { - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& expr:exprs) { std::vector items; @@ -173,7 +175,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), NUMVAL, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); @@ -189,23 +191,16 @@ namespace andromeda return update_applied_models(subj); } - bool nlp_model::apply(subject
& subj) + bool nlp_model::apply_on_table_data(subject
& subj) { - //LOG_S(INFO) << "starting numval ..."; - - if(not satisfies_dependencies(subj)) - { - return false; - } - //subj.show(); for(std::size_t i=0; i& subj); - virtual bool apply(subject
& subj); + virtual bool apply_on_table_data(subject
& subj); private: @@ -96,7 +96,7 @@ namespace andromeda return false; } - std::string text = subj.text; + std::string text = subj.get_text(); bool updating=true; @@ -119,7 +119,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), PARENTHESIS, expr.get_subtype(), name, orig, char_range, @@ -135,7 +135,7 @@ namespace andromeda return update_applied_models(subj); } - bool nlp_model::apply(subject
& subj) + bool nlp_model::apply_on_table_data(subject
& subj) { if(not satisfies_dependencies(subj)) { @@ -146,7 +146,7 @@ namespace andromeda { for(std::size_t j=0; j& subj, - std::vector& exprs, - std::vector& chunks) - { - chunks.clear(); - - std::stringstream ss; - for(std::size_t l=0; l& exprs, std::vector& chunks) { chunks.clear(); + auto& word_tokens = subj.get_word_tokens(); + std::stringstream ss; - for(std::size_t l=0; l& ranges_02, std::vector& chunks) { + auto& word_tokens = subj.get_word_tokens(); + for(pcre2_item& chunk:chunks) { std::vector token_inds = get_indices(chunk.text); @@ -198,7 +176,7 @@ namespace andromeda for(std::size_t l=0; l1) { - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), name, subtype, text, orig, char_range, ctok_range, wtok_range); @@ -240,6 +218,8 @@ namespace andromeda std::vector& ranges_02, std::vector& chunks) { + + for(pcre2_item& chunk:chunks) { std::vector token_inds = get_indices(chunk.text); @@ -249,12 +229,13 @@ namespace andromeda std::size_t ci=0,cj=0; auto& elem = subj(coor); - + auto& word_tokens = elem.get_word_tokens(); + std::vector > words; for(std::size_t l=0; l1) { - subj.instances.emplace_back(subj.get_hash(), - name, subtype, - text, orig, + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + name, subtype, + text, orig, coor, row_span, col_span, - char_range, ctok_range, wtok_range); + char_range, ctok_range, wtok_range); } } } diff --git a/src/andromeda/nlp/ent/pos_pattern/conn.h b/src/andromeda/nlp/ent/pos_pattern/conn.h index 97642bc2..646fb85c 100644 --- a/src/andromeda/nlp/ent/pos_pattern/conn.h +++ b/src/andromeda/nlp/ent/pos_pattern/conn.h @@ -22,10 +22,8 @@ namespace andromeda virtual model_name get_name() { return CONN; } virtual bool apply(subject& subj); - virtual bool apply(subject
& subj) { return false; } + virtual bool apply_on_table_data(subject
& subj) { return false; } - //virtual bool apply(subject& subj) { return false; } - private: std::vector exprs; diff --git a/src/andromeda/nlp/ent/pos_pattern/term.h b/src/andromeda/nlp/ent/pos_pattern/term.h index 47a8e495..fc2cfc60 100644 --- a/src/andromeda/nlp/ent/pos_pattern/term.h +++ b/src/andromeda/nlp/ent/pos_pattern/term.h @@ -22,7 +22,8 @@ namespace andromeda virtual model_name get_name() { return TERM; } virtual bool apply(subject& subj); - virtual bool apply(subject
& subj); + + virtual bool apply_on_table_data(subject
& subj); private: @@ -124,6 +125,7 @@ namespace andromeda { if(not satisfies_dependencies(subj, text_dependencies)) { + //LOG_S(WARNING) << "skipping term ..."; return false; } @@ -146,12 +148,12 @@ namespace andromeda return update_applied_models(subj); } - bool nlp_model::apply(subject
& subj) + bool nlp_model::apply_on_table_data(subject
& subj) { - if(not satisfies_dependencies(subj, table_dependencies)) - { - return false; - } + //if(not satisfies_dependencies(subj, table_dependencies)) + //{ + //return false; + //} for(std::size_t i=0; i& subj); - virtual bool apply(subject
& subj) { return false; } - - //virtual bool apply(subject& subj) { return false; } + virtual bool apply_on_table_data(subject
& subj) { return false; } private: diff --git a/src/andromeda/nlp/ent/quote.h b/src/andromeda/nlp/ent/quote.h index 15b44ff4..9b070f08 100644 --- a/src/andromeda/nlp/ent/quote.h +++ b/src/andromeda/nlp/ent/quote.h @@ -84,12 +84,12 @@ namespace andromeda return false; } - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& inst:subj.instances) { - if(dependencies.count(inst.model_type)==1) + if(dependencies.count(inst.get_model())==1) { - utils::mask(text, inst.char_range); + utils::mask(text, inst.get_char_range()); } } @@ -112,7 +112,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), QUOTE, expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/nlp/ent/reference.h b/src/andromeda/nlp/ent/reference.h index ae4fdae9..d4e53cf6 100644 --- a/src/andromeda/nlp/ent/reference.h +++ b/src/andromeda/nlp/ent/reference.h @@ -10,9 +10,18 @@ namespace andromeda class nlp_model: public base_crf_model { typedef typename word_token::range_type range_type; - + const static inline std::string TAG = "__"+to_string(REFERENCE)+"__"; + const static inline std::set LABELS = { "reference-number", + "authors", "title", + "publisher", + "journal", "conference", + "date", + "volume", "pages", + "url", "doi", "isbn", + "note"}; + public: nlp_model(); @@ -21,23 +30,26 @@ namespace andromeda ~nlp_model(); virtual std::set get_dependencies() { return dependencies; } - + virtual model_type get_type() { return ENT; } virtual model_name get_name() { return REFERENCE; } virtual bool apply(subject& subj); virtual bool apply(subject
& subj) { return false; } virtual bool apply(subject& subj); - + private: //void initialise(std::filesystem::path resources_dir); bool initialise(); void run_model(subject& subj); - + void post_process(subject& subj); - + + std::string normalise_name(std::string orig); + void normalise_subject(subject& subj); + private: const static std::set dependencies; @@ -45,7 +57,7 @@ namespace andromeda std::filesystem::path model_file; }; - const std::set nlp_model::dependencies = { SEMANTIC, LINK, NUMVAL}; + const std::set nlp_model::dependencies = { LINK, NUMVAL, SEMANTIC }; nlp_model::nlp_model(): model_file(get_crf_dir() / "reference/crf_reference.bin") @@ -53,14 +65,6 @@ namespace andromeda initialise(); } - /* - nlp_model::nlp_model(std::filesystem::path resources_dir) - { - //initialise(resources_dir); - initialise(); - } - */ - nlp_model::~nlp_model() {} @@ -68,230 +72,281 @@ namespace andromeda { if(not base_crf_model::load(model_file, false)) { - LOG_S(ERROR) << "could not load REFERENCE model from " << model_file; - return false; + LOG_S(ERROR) << "could not load REFERENCE model from " << model_file; + return false; } return true; } - - /* - void nlp_model::initialise(std::filesystem::path resources_dir) + + bool nlp_model::apply(subject& doc) { - if(not base_crf_model::load(resources_dir / "models/crf/reference/reference-latest.bin", false)) + if(not satisfies_dependencies(doc)) { - LOG_S(FATAL) << "could not load REFERENCE model from " << resources_dir; + return false; } - } - */ - - bool nlp_model::apply(subject& doc) - { + + //LOG_S(INFO) << "#-texts: " << doc.texts.size(); for(auto& paragraph:doc.texts) { - this->apply(*paragraph); + this->apply(*paragraph); } return true; } - + bool nlp_model::apply(subject& subj) { - //LOG_S(WARNING) << "reference parsing started ..."; - + //LOG_S(INFO) << __FILE__ << ":" << __LINE__ << "\t" << subj.get_text(); + if(not satisfies_dependencies(subj)) { - return false; + //LOG_S(WARNING) << "does not satisfy deps ... "; + return false; } bool is_ref=false; for(auto& cls:subj.properties) { - //LOG_S(INFO) << cls.type << " -> " << (cls.type==to_key(SEMANTIC)); - //LOG_S(INFO) << cls.name << " -> " << (cls.name=="reference"); - - if(cls.get_type()==to_key(SEMANTIC) and - cls.get_name()=="reference") - { - is_ref = true; - } + if((cls.get_type()==to_key(SEMANTIC)) and (cls.is_label("reference"))) + { + is_ref = true; + //LOG_S(WARNING) << " => " << cls.get_type() << "\t" << cls.get_label(); + } + else + { + //LOG_S(INFO) << " => " << cls.get_type() << "\t" << cls.get_label(); + } } - + // text in subject is not a reference and we do not apply the reference parser - if(not is_ref) + if(not is_ref) { - //LOG_S(WARNING) << "is not a reference ..."; - return true; + return true; } - + run_model(subj); post_process(subj); - + return true; } void nlp_model::run_model(subject& subj) { - //LOG_S(WARNING) << __FILE__ << ":" << __LINE__; - + //LOG_S(WARNING) << __FILE__ << ":" << __LINE__ << "\t" << __FUNCTION__; + std::vector crf_tokens={}; std::map ptid_to_wtid={}; - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); //auto& entities = subj.entities; - + //pre_process(wtokens, ent.wtok_range, pos_tokens, ptid_to_wtid); for(std::size_t l=0; l texts={".",",","and"}; - for(std::size_t l=1; l texts={".",",","and"}; + for(std::size_t l=1; l::post_process(subject& subj) { - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); //std::map > > labels_to_crng={}; std::map > labels_to_crng={}; - + for(std::size_t l=0; lfirst << ": " << (itr->second).size(); - for(auto jtr=(itr->second).begin(); jtr!=(itr->second).end(); jtr++) - { - LOG_S(INFO) << " -> " << (*jtr)[0] << ", " << (*jtr)[1]; - } + LOG_S(INFO) << itr->first << ": " << (itr->second).size(); + for(auto jtr=(itr->second).begin(); jtr!=(itr->second).end(); jtr++) + { + LOG_S(INFO) << " -> " << (*jtr)[0] << ", " << (*jtr)[1]; + } } */ - - std::set labels + + /* + std::set labels = { "citation-number", - "author", "title", - "publisher", "editor", - "journal", "container-title", - "location", "date", - "volume", "pages", - "url", "doi"}; - - for(const auto& label:labels) - { - if(labels_to_crng.count(label)==0) - { - continue; - } - - auto& ranges = labels_to_crng.at(label); - - std::size_t ind=0; - while(indmodel_type!=REFERENCE) - { - itr = subj.instances.erase(itr); - } - else - { - itr++; - } - } - } + normalise_subject(subj); + } + + std::string nlp_model::normalise_name(std::string orig) + { + const static std::vector endings + = {")", "]", ".", ",", " "}; + const static std::vector startings + = {"(", "[", + "doi:", "DOI:", "isbn:", "ISBN:", + "arXiv preprint", + " "}; + std::string name = orig; + + bool updating=true; + while(updating) + { + updating=false; + for(auto end:endings) + { + if(name.ends_with(end)) + { + name = name.substr(0, name.size()-end.size()); + updating=true; + } + } + } + + updating = true; + while(updating) + { + updating=false; + for(auto strt:startings) + { + if(name.starts_with(strt)) + { + name = name.substr(strt.size(), name.size()-strt.size()); + updating=true; + } + } + } + + return name; } - + + void nlp_model::normalise_subject(subject& subj) + { + auto itr=subj.instances.begin(); + while(itr!=subj.instances.end()) + { + if(not (itr->is_model(REFERENCE))) + { + itr = subj.instances.erase(itr); + } + else + { + itr++; + } + } + } + } #endif diff --git a/src/andromeda/nlp/ent/sentence.h b/src/andromeda/nlp/ent/sentence.h index 3d721ab0..9624e73b 100644 --- a/src/andromeda/nlp/ent/sentence.h +++ b/src/andromeda/nlp/ent/sentence.h @@ -22,9 +22,6 @@ namespace andromeda virtual bool apply(subject& subj); virtual bool apply(subject
& subj) { return false; } - - //virtual bool apply(subject& subj) { return false; } - //virtual bool apply(subject& subj); private: @@ -63,19 +60,19 @@ namespace andromeda return false; } - std::string text = subj.text; + std::string text = subj.get_text(); for(auto& ent:subj.instances) { - if(dependencies.count(ent.model_type)==1) + if(dependencies.count(ent.get_model())==1) { - if(ent.model_type==NAME or - ent.model_type==EXPRESSION or - ent.model_type==QUOTE) + if(ent.is_model(NAME) or + ent.is_model(EXPRESSION) or + ent.is_model(QUOTE)) { - for(std::size_t i=ent.char_range[0]; i sent_ranges={}; for(auto& expr:exprs) { std::vector items; @@ -108,11 +106,110 @@ namespace andromeda std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); - subj.instances.emplace_back(subj.get_hash(), - SENTENCE, "", + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + SENTENCE, "proper", sent, sent, char_range, ctok_range, wtok_range); + + sent_ranges.push_back(char_range); + } + } + + std::vector ranges={}; + for(auto& rng:sent_ranges) + { + if(ranges.size()==0 and rng.at(0)==0) + { + ranges.push_back(rng); + } + else if(ranges.size()==0 and rng.at(0)>0) + { + ranges.push_back({0, rng.at(0)}); + ranges.push_back(rng); + } + else if(ranges.back().at(1)==rng.at(0)) + { + ranges.push_back(rng); + } + else if(ranges.back().at(1)0 and ranges.back().at(1)0) + { + ranges.push_back({0, text.size()}); + } + + for(auto itr=ranges.begin(); itr!=ranges.end(); ) + { + bool updated=false; + for(auto sent_rng:sent_ranges) + { + if(*itr==sent_rng) + { + itr = ranges.erase(itr); + updated=true; + } + } + + if(not updated) + { + itr++; + } + } + + //LOG_S(WARNING) << "text: " << text; + //LOG_S(WARNING) << "text-size: " << text.size() << "; subj.len: " << subj.get_len(); + + for(auto rng:ranges) + { + range_type char_range = rng; + + //LOG_S(INFO) << "char (1): " << char_range.at(0) << "-" << char_range.at(1); + + while(char_range.at(0) char: " << char_range.at(0) << "-" << char_range.at(1); + } + + //LOG_S(INFO) << "char (2): " << char_range.at(0) << "-" << char_range.at(1); + + while(char_range.at(0) char: " << char_range.at(0) << "-" << char_range.at(1); + } + + //LOG_S(INFO) << "char (3): " << char_range.at(0) << "-" << char_range.at(1); + + if(char_range.at(0)==char_range.at(1)) + { + continue; } + + range_type ctok_range = subj.get_char_token_range(char_range); + range_type wtok_range = subj.get_word_token_range(char_range); + + std::string sent = orig.substr(char_range[0], char_range[1]-char_range[0]); + + //LOG_S(WARNING) << " => sent: " << sent; + + subj.instances.emplace_back(subj.get_hash(), subj.get_name(), subj.get_self_ref(), + SENTENCE, "improper", + sent, sent, + char_range, ctok_range, wtok_range); } return update_applied_models(subj); diff --git a/src/andromeda/nlp/pos/lapos.h b/src/andromeda/nlp/pos/lapos.h index b23d12f1..ab0e93d5 100644 --- a/src/andromeda/nlp/pos/lapos.h +++ b/src/andromeda/nlp/pos/lapos.h @@ -39,8 +39,8 @@ namespace andromeda bool contract_word_tokens(subject& subj); - void pre_process(std::vector& wtokens, - range_type& rng, + void pre_process(const std::vector& wtokens, + const range_type rng, std::vector& pos_tokens, std::map& ptid_to_wtid); @@ -126,7 +126,9 @@ namespace andromeda template bool nlp_model::check_dependency(const std::set& deps, subject_type& subj, std::string& lang) - { + { + //LOG_S(INFO) << __FUNCTION__; + bool static_dependency = satisfies_dependencies(subj, deps); bool dyn_dependency=false; @@ -134,9 +136,9 @@ namespace andromeda for(auto& prop:subj.properties) { if(prop.get_type()==to_key(LANGUAGE) and - pos_models.count(prop.get_name())==1) + pos_models.count(prop.get_label())==1) { - lang = prop.get_name(); + lang = prop.get_label(); dyn_dependency=true; } } @@ -147,14 +149,17 @@ namespace andromeda bool nlp_model::apply(subject& subj) { // initialise - for(auto& token:subj.word_tokens) - { - token.set_pos(word_token::UNDEF_POS); - } + //for(auto& token:subj.get_word_tokens()) + //{ + //token.set_pos(word_token::UNDEF_POS); + //} + subj.init_pos(); + std::string lang="null"; if(not check_dependency(text_dependencies, subj, lang)) { + //LOG_S(WARNING) << "skipping POS ..."; return false; } @@ -171,23 +176,85 @@ namespace andromeda std::vector pos_tokens={}; std::map ptid_to_wtid={}; - auto& wtokens = subj.word_tokens; + auto& wtokens = subj.get_word_tokens(); auto& instances = subj.instances; + /* // iterate over the sentences ... for(auto& inst:instances) { - if(inst.model_type!=SENTENCE) + //LOG_S(INFO) << "inst: " << to_key(inst.get_model()) + //<< "\t" << SENTENCE << "\t" << inst.get_model() + //<< "\t" << inst.is_model(SENTENCE); + + if(not inst.is_model(SENTENCE)) { + //LOG_S(WARNING) << " --> skipping inst ..."; continue; } - pre_process(wtokens, inst.wtok_range, pos_tokens, ptid_to_wtid); + pre_process(wtokens, inst.get_wtok_range(), pos_tokens, ptid_to_wtid); pos_model->predict(pos_tokens); post_process(wtokens, pos_tokens, ptid_to_wtid); } + */ + + std::vector sent_ranges={}; + for(auto& inst:instances) + { + if(inst.is_model(SENTENCE)) + { + sent_ranges.push_back(inst.get_wtok_range()); + + //LOG_S(INFO) << "sentence (" << inst.get_subtype() << ") : " + //<< sent_ranges.back().at(0) << ", " + //<< sent_ranges.back().at(1); + } + } + + /* + std::vector ranges={}; + for(auto& rng:sent_ranges) + { + if(ranges.size()==0 and rng.at(0)==0) + { + ranges.push_back(rng); + } + else if(ranges.size()==0 and rng.at(0)>0) + { + ranges.push_back({0, rng.at(0)}); + ranges.push_back(rng); + } + else if(ranges.back().at(1)==rng.at(0)) + { + ranges.push_back(rng); + } + else if(ranges.back().at(1)0 and ranges.back().at(1)predict(pos_tokens); + + post_process(wtokens, pos_tokens, ptid_to_wtid); + } } bool nlp_model::apply(subject
& subj) @@ -204,7 +271,7 @@ namespace andromeda { for(std::size_t j=0; j::pre_process(std::vector& wtokens, - range_type& rng, + void nlp_model::pre_process(const std::vector& wtokens, + const range_type rng, std::vector& pos_tokens, std::map& ptid_to_wtid) { diff --git a/src/andromeda/nlp/rel/abbreviation.h b/src/andromeda/nlp/rel/abbreviation.h index 5a082843..5dfca255 100644 --- a/src/andromeda/nlp/rel/abbreviation.h +++ b/src/andromeda/nlp/rel/abbreviation.h @@ -75,28 +75,31 @@ namespace andromeda void nlp_model::find_abbreviation_instances(subject& subj) { - std::string& text = subj.text; + std::string text = subj.get_text(); //std::size_t max_id = subj.get_max_ent_hash(); for(auto& ent_j:subj.instances) { - auto& crng = ent_j.char_range; + auto crng = ent_j.get_char_range(); - auto& ctok_rng = ent_j.ctok_range; - auto& wtok_rng = ent_j.wtok_range; + auto ctok_rng = ent_j.get_ctok_range(); + auto wtok_rng = ent_j.get_wtok_range(); - if(ent_j.model_type==TERM and + auto name = ent_j.get_name(); + auto orig = ent_j.get_orig(); + + if(ent_j.is_model(TERM) and 0get_name()); } /* INFERENCE */ + + template + bool is_applied(subject_type& subj); template bool satisfies_dependencies(subject_type& subj); @@ -39,7 +42,13 @@ namespace andromeda virtual bool apply(std::string& text, nlohmann::json& annots) { return false; } virtual bool apply(subject& subj) = 0;// { return false; } - virtual bool apply(subject
& subj) = 0;//{ return false; } + //virtual bool apply(subject
& subj) = 0;//{ return false; } + + virtual bool apply(subject
& subj); //{ return false; } + virtual bool apply_on_table_data(subject
& subj) { return false; } + + virtual bool apply(subject
& subj); + virtual bool apply_on_figure_data(subject
& subj) { return false; } virtual bool apply(subject& subj); @@ -60,12 +69,29 @@ namespace andromeda std::vector >& dep_models) { return false; } }; + template + bool base_nlp_model::is_applied(subject_type& subj) + { + return (subj.applied_models.count(this->get_key())==1); + } + template bool base_nlp_model::satisfies_dependencies(subject_type& subj) { + //if(subj.applied_models.count(this->get_key())) + //{ + //LOG_S(WARNING) << "already applied " << this->get_key() << " ..."; + //return false; // already done ... + //} + + if(is_applied(subj)) // already done ... + { + return false; + } + return satisfies_dependencies(subj, get_dependencies()); } - + template bool base_nlp_model::satisfies_dependencies(subject_type& subj, const std::set& deps) { @@ -88,25 +114,62 @@ namespace andromeda return true; } - bool base_nlp_model::apply(subject& subj) + bool base_nlp_model::apply(subject
& subj) + { + if(not satisfies_dependencies(subj)) + { + return false; + } + + for(auto& caption:subj.captions) + { + this->apply(*caption); + } + + this->apply_on_table_data(subj); + + return true; + } + + bool base_nlp_model::apply(subject
& subj) { if(not satisfies_dependencies(subj)) { return false; } - //LOG_S(INFO) << "apply " << get_key() << " on document: " << subj.doc_name; + for(auto& caption:subj.captions) + { + this->apply(*caption); + } + this->apply_on_figure_data(subj); + + return true; + } + + bool base_nlp_model::apply(subject& subj) + { + if(not satisfies_dependencies(subj)) + { + return false; + } + for(auto& text_ptr:subj.texts) { this->apply(*text_ptr); } - + for(auto& table_ptr:subj.tables) { this->apply(*table_ptr); } + for(auto& figure_ptr:subj.figures) + { + this->apply(*figure_ptr); + } + return update_applied_models(subj); } diff --git a/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h b/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h index 1582a23a..9e827b5c 100644 --- a/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h +++ b/src/andromeda/tooling/models/base_crf_model/algorithms/crf_train.h @@ -262,11 +262,12 @@ namespace andromeda_crf { nlohmann::json sample = nlohmann::json::parse(line); - assert(sample.count("word-tokens")>0); + //assert(sample.count("word-tokens")>0); + //assert(sample.count(text_element::word_tokens_lbl)==1); assert(sample.count("training-sample")>0); //LOG_S(INFO) << sample.dump(2); - auto& wtokens = sample["word-tokens"]; + auto& wtokens = sample.at(andromeda::text_element::word_tokens_lbl); std::vector headers = {}; headers = wtokens.value("headers", headers); diff --git a/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp b/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp index 28c37789..c08e0ee9 100644 --- a/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp +++ b/src/andromeda/tooling/models/base_crf_model/structures/crf_model.cpp @@ -875,7 +875,7 @@ namespace andromeda_crf if (seq.vs.size() >= MAX_LEN) { LOG_S(ERROR) << "error: sequence is too long."; return; - //exit(1); + } if (seq.vs.size() == 0) { @@ -905,14 +905,22 @@ namespace andromeda_crf assert(s.label >= 0 && s.label < MAX_LABEL_TYPES); - for (std::vector::const_iterator j = i->features.begin(); j != i->features.end(); j++) { - if (contain_space(*j)) { - LOG_S(ERROR) << "error: the name of a feature must not contain any space."; - exit(1); - } - s.positive_features.push_back(_featurename_bag.Put(*j)); - } + for (std::vector::const_iterator j = i->features.begin(); j != i->features.end(); j++) + { + if(contain_space(*j)) + { + LOG_S(ERROR) << "error: the name of a feature (" << (*j) << ") must not contain any space."; + + std::string feat = *j; + feat = andromeda::utils::replace(feat, " ", "_"); + s.positive_features.push_back(_featurename_bag.Put(feat)); + } + else + { + s.positive_features.push_back(_featurename_bag.Put(*j)); + } + } s1.vs.push_back(s); } diff --git a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h index 686009da..0aae4012 100644 --- a/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h +++ b/src/andromeda/tooling/models/base_fst_model/fasttext_supervised_model.h @@ -352,15 +352,17 @@ namespace andromeda bool fasttext_supervised_model::preprocess(const subject& subj, std::string& text) { - auto& wtokens = subj.word_tokens; + //auto& wtokens = subj.word_tokens; //LOG_S(INFO) << "tokens: \n\n" << tabulate(wtokens); std::stringstream ss; - - std::size_t MAXLEN = 256; - for(std::size_t l=0; l0) @@ -790,10 +792,10 @@ namespace andromeda if(preprocess(subj, text) and classify(text, label, conf)) { - std::string key = get_key(); + //std::string key = get_key(); - subj.properties.emplace_back(key, label, conf); - subj.applied_models.insert(key); + subj.properties.emplace_back(subj.get_hash(), TEXT, "#", get_name(), label, conf); + subj.applied_models.insert(get_key()); } return update_applied_models(subj); @@ -813,13 +815,10 @@ namespace andromeda if(preprocess(subj, text) and classify(text, label, conf)) { - std::string key = get_key(); - - subj.properties.emplace_back(key, label, conf); - subj.applied_models.insert(key); + //std::string key = get_key(); - //LOG_S(INFO) << "text: " << text; - //LOG_S(INFO) << key << " (" << label << "): " << conf; + subj.properties.emplace_back(subj.get_hash(), TABLE, "#", get_name(), label, conf); + subj.applied_models.insert(get_key()); } return update_applied_models(subj); diff --git a/src/andromeda/tooling/models/base_rgx_model.h b/src/andromeda/tooling/models/base_rgx_model.h index 9213c19e..64547704 100644 --- a/src/andromeda/tooling/models/base_rgx_model.h +++ b/src/andromeda/tooling/models/base_rgx_model.h @@ -140,7 +140,7 @@ namespace andromeda std::string orig = subj.from_char_range(char_range); std::string name = subj.from_ctok_range(ctok_range); - subj.instances.emplace_back(subj.get_hash(), + subj.instances.emplace_back(subj.get_hash(), TEXT, subj.get_self_ref(), this->get_name(), expr.get_subtype(), name, orig, char_range, ctok_range, wtok_range); diff --git a/src/andromeda/tooling/producers/impl/document.h b/src/andromeda/tooling/producers/impl/document.h index 88bae0af..12ba85d3 100644 --- a/src/andromeda/tooling/producers/impl/document.h +++ b/src/andromeda/tooling/producers/impl/document.h @@ -255,7 +255,7 @@ namespace andromeda bool producer::write(doc_type& subj) { - std::filesystem::path filepath = subj.filepath; + std::filesystem::path filepath = subj.get_filepath(); std::filesystem::path filename = filepath.filename(); //std::filesystem::path filedir = filepath.dirname(); diff --git a/src/andromeda/tooling/structs/elements/prov_element.h b/src/andromeda/tooling/structs/elements/prov_element.h index ea75274e..d753fa5c 100644 --- a/src/andromeda/tooling/structs/elements/prov_element.h +++ b/src/andromeda/tooling/structs/elements/prov_element.h @@ -46,9 +46,6 @@ namespace andromeda ind_type get_maintext_ind() { return maintext_ind; } ind_type get_pdforder_ind() { return pdforder_ind; } - //std::string get_path() { return path; } - //void set_path(std::string val) { path = val; } - std::string get_item_ref() { return item_ref; } void set_item_ref(std::string val) { item_ref = val; } diff --git a/src/andromeda/tooling/structs/elements/table_element.h b/src/andromeda/tooling/structs/elements/table_element.h index 89791534..d9b8786f 100644 --- a/src/andromeda/tooling/structs/elements/table_element.h +++ b/src/andromeda/tooling/structs/elements/table_element.h @@ -58,30 +58,9 @@ namespace andromeda table_element::table_element(nlohmann::json& json_cell) { - from_json(json_cell); - - text_element::set(orig, NULL, NULL); + from_json(json_cell); } - /* - table_element::table_element(uint64_t i, - uint64_t j, - std::string orig): - text_element(), - i(i), j(j), - - row_span({i,i+1}), - col_span({j,j+1}), - - row_header(false), - col_header(false), - - numeric(false) - { - text_element::set(orig, NULL, NULL); - } - */ - table_element::table_element(uint64_t i, uint64_t j, std::array row_span, std::array col_span, @@ -100,7 +79,8 @@ namespace andromeda numeric(false) { - text_element::set(orig, NULL, NULL); + //text_element::set(orig, NULL, NULL); + text_element::set_text(orig); } nlohmann::json table_element::to_json() @@ -149,12 +129,22 @@ namespace andromeda i = json_cell.at("row").get(); j = json_cell.at("col").get(); - row_span = json_cell.at("row-span").get>(); - col_span = json_cell.at("col-span").get>(); + row_span = json_cell.at("row-span").get >(); + col_span = json_cell.at("col-span").get >(); + + std::string ctext = json_cell.at("text").get(); + text_element::set_text(ctext); - text = json_cell.at("text").get(); type = json_cell.at("type").get(); - bbox = json_cell.at("bbox").get >(); + + if(json_cell.at("bbox").is_array()) + { + bbox = json_cell.at("bbox").get >(); + } + else + { + bbox = {0.0, 0.0, 0.0, 0.0}; + } row_header = json_cell.at("row-header").get(); col_header = json_cell.at("col-header").get(); diff --git a/src/andromeda/tooling/structs/elements/text_element.h b/src/andromeda/tooling/structs/elements/text_element.h index f4d8f748..488db898 100644 --- a/src/andromeda/tooling/structs/elements/text_element.h +++ b/src/andromeda/tooling/structs/elements/text_element.h @@ -11,8 +11,13 @@ namespace andromeda typedef std::tuple candidate_type; - const static inline std::string char_tokens_lbl = "char-tokens"; - const static inline std::string word_tokens_lbl = "word-tokens"; + const static inline std::string text_lbl = "text"; + const static inline std::string orig_lbl = "orig"; + + const static inline std::string text_hash_lbl = "text_hash"; + + const static inline std::string char_tokens_lbl = "char_tokens"; + const static inline std::string word_tokens_lbl = "word_tokens"; public: @@ -24,10 +29,26 @@ namespace andromeda text_element(); bool is_valid(); + + std::size_t get_len() const { return len; } // number-of-chars + std::size_t get_dst() const { return dst; } // number-of-utf8-tokens + + bool is_text_valid() { return text_valid; } void clear(); hash_type get_text_hash() const { return text_hash; } + + std::size_t get_num_wtokens() const { return word_tokens.size(); } + const word_token& get_wtoken(std::size_t i) const { return word_tokens.at(i); } + + std::vector& get_word_tokens() { return word_tokens; } + + void init_pos() { for(auto& wtoken:word_tokens) { wtoken.set_pos(word_token::UNDEF_POS); } } + + void set_pos(std::size_t i, std::string pos) { word_tokens.at(i).set_pos(pos); } + void set_tag(std::size_t i, std::string tag) { word_tokens.at(i).set_tag(tag); } + void set_word(std::size_t i, std::string wrd) { word_tokens.at(i).set_word(wrd); } bool set_text(const std::string& ctext); @@ -69,18 +90,23 @@ namespace andromeda void contract_char_tokens(); void contract_word_tokens(); - public: + //public: + private: + bool text_valid; - uint64_t text_hash; // hash of normalised text - + std::size_t len; // number-of-chars std::size_t dst; // number-of-utf8-tokens + protected: + std::string orig; // original text std::string text; // normalised text (removing confusables) + protected: + std::vector char_tokens; std::vector word_tokens; }; @@ -113,15 +139,15 @@ namespace andromeda { nlohmann::json elem = nlohmann::json::object({}); - elem["text"] = text; - elem["orig"] = orig; + elem[text_lbl] = text; + elem[orig_lbl] = orig; - elem["text-hash"] = text_hash; + elem[text_hash_lbl] = text_hash; // in the default setting, word-tokens will not be dumped - if(filters.count("word-tokens")) + if(filters.count(word_tokens_lbl)) { - elem["word-tokens"] = andromeda::to_json(word_tokens, text); + elem[word_tokens_lbl] = andromeda::to_json(word_tokens, text); } return elem; @@ -133,14 +159,14 @@ namespace andromeda this->clear(); - if(elem.count("orig")) + if(elem.count(orig_lbl)) { - auto ctext = elem.at("orig").get(); + auto ctext = elem.at(orig_lbl).get(); result = set_text(ctext); } - else if(elem.count("text")) + else if(elem.count(text_lbl)) { - auto ctext = elem.at("text").get(); + auto ctext = elem.at(text_lbl).get(); result = set_text(ctext); } else @@ -151,9 +177,9 @@ namespace andromeda return false; } - if(elem.count("word-tokens")) + if(elem.count(word_tokens_lbl)) { - const nlohmann::json& json_word_tokens = elem.at("word-tokens"); + const nlohmann::json& json_word_tokens = elem.at(word_tokens_lbl); andromeda::from_json(word_tokens, json_word_tokens); } @@ -195,17 +221,21 @@ namespace andromeda bool text_element::set_text(const std::string& ctext) { clear(); - + + //LOG_S(INFO) << ctext << " -> " << orig << " -> " << text; + orig = utils::strip(ctext); text = orig; + //LOG_S(INFO) << ctext << " -> " << orig << " -> " << text; + if(orig.size()==0) { return false; } - + len = orig.size(); - + text_valid = utf8::is_valid(orig.c_str(), orig.c_str()+len); text_hash = utils::to_reproducible_hash(orig); @@ -350,14 +380,15 @@ namespace andromeda { std::string tmp = char_tokens.at(j).str(); - if(constants::spaces.count(tmp) or - constants::brackets.count(tmp) or + if(constants::spaces.count(tmp) or + constants::brackets.count(tmp) or constants::punktuation.count(tmp) or - constants::numbers.count(tmp) ) - { - stop = true; - } - + constants::numbers.count(tmp)) + { + stop = true; + } + + if((not stop) or (j-i)==0) { dst += char_tokens.at(j).len(); @@ -371,6 +402,8 @@ namespace andromeda { stop = true; } + + //LOG_S(INFO) << stop << "\t" << tmp << "\t" << ss.str(); } std::string word = ss.str(); @@ -383,6 +416,42 @@ namespace andromeda char_l += dst; } + + // contract all pure numbers (0-9) into integers + auto curr = word_tokens.begin(); + auto prev = word_tokens.begin(); + while(curr != word_tokens.end()) + { + if(curr==word_tokens.begin()) + { + curr++; + } + else + { + auto prev_wrd = prev->get_word(); + auto curr_wrd = curr->get_word(); + + auto prev_char = prev_wrd.back(); + auto curr_char = curr_wrd.back(); + + if('0'<=prev_char and prev_char<='9' and + '0'<=curr_char and curr_char<='9' and + prev->get_rng(1)==curr->get_rng(0)) + { + prev_wrd += curr_wrd; + + word_token token(prev->get_rng(0), prev_wrd); + *prev = token; + + curr = word_tokens.erase(curr); + } + else + { + prev++; + curr++; + } + } + } } void text_element::contract_word_tokens() @@ -516,12 +585,12 @@ namespace andromeda std::string text_element::from_char_range(range_type char_range) { - std::size_t beg = char_range[0]; - std::size_t len = char_range[1]-beg; + std::size_t beg_ = char_range[0]; + std::size_t len_ = char_range[1]-beg_; if(char_range[1]<=text.size()) { - return text.substr(beg, len); + return text.substr(beg_, len_); } LOG_S(ERROR) << "char-range is out of bounds: text-length: " << text.size() diff --git a/src/andromeda/tooling/structs/elements/utils.h b/src/andromeda/tooling/structs/elements/utils.h index 23af9fc7..11c3cf40 100644 --- a/src/andromeda/tooling/structs/elements/utils.h +++ b/src/andromeda/tooling/structs/elements/utils.h @@ -14,7 +14,7 @@ namespace andromeda grid.push_back({}); for(auto& item:row) { - grid.back().push_back(item.text); + grid.back().push_back(item.get_text()); } } diff --git a/src/andromeda/tooling/structs/items/cls/base.h b/src/andromeda/tooling/structs/items/cls/base.h index c54e951e..430a8213 100644 --- a/src/andromeda/tooling/structs/items/cls/base.h +++ b/src/andromeda/tooling/structs/items/cls/base.h @@ -10,108 +10,200 @@ namespace andromeda public: const static inline std::string UNDEF = "__undef__"; - const static inline std::vector HEADERS = { "type", "label", "confidence"}; - + + const static inline std::vector HEADERS + = { "type", + "subj_hash", "subj_name", "subj_path", + "label", "confidence"}; + public: base_property(); - base_property(std::string type, - std::string name, - val_type conf); + base_property(hash_type subj_hash, // hash of the subject from which the entity comes + subject_name subj_name, + std::string subj_path, + model_name model, + std::string label, + val_type conf); + + hash_type get_subj_hash() const { return subj_hash; } + subject_name get_subj_name() const { return subj_name; } + std::string get_subj_path() const { return subj_path; } + + bool is_type(const std::string name) const { return (name==to_key(model)); } + bool is_label(const std::string label) const { return (label==this->label); } - std::string get_type() { return this->type; } - std::string get_name() { return this->name; } + bool is_model(const model_name name) const { return (name==model); } - float get_conf() { return this->conf; } + model_name get_model() const { return this->model; } + std::string get_type() const { return to_key(this->model); } + + std::string get_label() const { return this->label; } + float get_conf() const { return this->conf; } + + void set_label(const std::string label) { this->label=label; } + void set_conf(const float conf) { this->conf = conf; } - void set_name(const std::string& name) { this->name = name; } - void set_conf(const float& conf) { this->conf = conf; } - std::vector to_row(); - + nlohmann::json to_json(); nlohmann::json to_json_row(); bool from_json_row(const nlohmann::json& row); + friend bool operator==(const base_property& lhs, const base_property& rhs); friend bool operator<(const base_property& lhs, const base_property& rhs); - + private: - std::string type; - std::string name; - val_type conf; + hash_type subj_hash; // hash of the subject from which the entity comes + subject_name subj_name; + std::string subj_path; + + model_name model; + std::string label; + val_type conf; }; - + base_property::base_property(): - type(UNDEF), - name(UNDEF), + subj_hash(-1), + subj_name(TEXT), + subj_path("#"), + + model(NULL_MODEL), + label("UNDEF"), conf(0.0) {} - - base_property::base_property(std::string type, - std::string name, - val_type conf): - type(type), - name(name), + + base_property::base_property(hash_type subj_hash, + subject_name subj_name, + std::string subj_path, + model_name model, + std::string label, + val_type conf): + subj_hash(subj_hash), + subj_name(subj_name), + subj_path(subj_path), + + model(model), + label(label), conf(conf) {} std::vector base_property::to_row() { - std::vector row = { type, name, std::to_string(utils::round_conf(conf)) }; + std::vector row = { to_key(model), + std::to_string(subj_hash), to_string(subj_name), subj_path, + label, std::to_string(utils::round_conf(conf)) }; assert(row.size()==HEADERS.size()); - + return row; } - + nlohmann::json base_property::to_json() { nlohmann::json result = nlohmann::json::object(); { - result["type"] = type; - result["name"] = name; + result["type"] = to_key(model); + + result["subj_hash"] = subj_hash; + result["subj_name"] = to_string(subj_name); + result["subj_path"] = subj_path; + + result["label"] = label; result["confidence"] = utils::round_conf(conf); } - + return result; } nlohmann::json base_property::to_json_row() { - nlohmann::json row = nlohmann::json::array({ type, name, utils::round_conf(conf)}); + nlohmann::json row = nlohmann::json::array({ + to_key(model), + subj_hash, to_string(subj_name), subj_path, + label, utils::round_conf(conf)}); assert(row.size()==HEADERS.size()); - + return row; } - + bool base_property::from_json_row(const nlohmann::json& row) { if(row.size()>=HEADERS.size()) { - type = row[0].get(); - name = row[1].get(); - conf = row[2].get(); - - return true; + model = to_modelname(row[0].get()); + + subj_hash = row[1].get(); + subj_name = to_subject_name(row[2].get()); + subj_path = row[3].get(); + + label = row[4].get(); + conf = row[5].get(); + + return true; } - + return false; - } + } + + bool operator==(const base_property& lhs, + const base_property& rhs) + { + return ((lhs.subj_name==rhs.subj_name) and + (lhs.subj_path==rhs.subj_path) and + (lhs.model==rhs.model)); + } - bool operator<(const base_property& lhs, const base_property& rhs) + bool operator<(const base_property& lhs, + const base_property& rhs) { - if(lhs.type==rhs.type) + if(lhs.subj_name==rhs.subj_name) { - return lhs.conf>rhs.conf; + if(lhs.subj_path==rhs.subj_path) + { + if(lhs.model==rhs.model) + { + return lhs.conf>rhs.conf; + } + else + { + return (lhs.model0); + assert(orig.size()>0); + assert(char_range[0]<=char_range[1]); assert(ctok_range[0]<=ctok_range[1]); assert(wtok_range[0]<=wtok_range[1]); @@ -298,41 +345,11 @@ namespace andromeda wtok_range_match = (wtok_range[0] hash_vec = { subj_hash, @@ -344,7 +361,7 @@ namespace andromeda }; //LOG_S(INFO) << "'" << name << "' => ehash: " << ehash << " => ihash: " << ihash; - + ihash = utils::to_hash(hash_vec); } @@ -370,98 +387,18 @@ namespace andromeda } nlohmann::json base_instance::to_json_row() const - { - auto row = nlohmann::json::array({to_key(model_type), model_subtype, - subj_hash, to_string(subj_name), subj_path, - std::round(100.0*conf)/100.0, - ehash, ihash, - coor[0], coor[1], - char_range[0], char_range[1], - ctok_range[0], ctok_range[1], - wtok_range[0], wtok_range[1], - wtok_range_match, - name, orig}); - - assert(row.size()==headers().size()); - - return row; - } - - bool base_instance::from_json_row(const nlohmann::json& row) - { - if((not row.is_array()) or row.size()!=19) - { - LOG_S(ERROR) << "inconsistent entity-row: " << row.dump(); - return false; - } - - model_type = to_modelname(row.at(0).get()); - model_subtype = row.at(1).get(); - - subj_hash = row.at(2).get(); - subj_name = to_subject_name(row.at(3).get()); - subj_path = row.at(4).get(); - - conf = (row.at(5).get())/100.0; - //conf = (row.at(5).get())/100.0; - - ehash = row.at(6).get(); - ihash = row.at(7).get(); - - coor.at(0) = row.at(8).get(); - coor.at(1) = row.at(9).get(); - - char_range.at(0) = row.at(10).get(); - char_range.at(1) = row.at(11).get(); - - ctok_range.at(0) = row.at(12).get(); - ctok_range.at(1) = row.at(13).get(); - - wtok_range.at(0) = row.at(14).get(); - wtok_range.at(1) = row.at(15).get(); - - wtok_range_match = row.at(16).get(); - - name = row.at(17).get(); - orig = row.at(18).get(); - - return true; - } - - std::vector base_instance::headers(subject_name subj) - { - switch(subj) - { - case TEXT: - { - return TEXT_HEADERS; - } - break; - - case TABLE: - { - return TABLE_HEADERS; - } - break; - - default: - { - return HEADERS; - } - } - } - - nlohmann::json base_instance::to_json_row(subject_name subj) const { nlohmann::json row; - switch(subj) + switch(subj_name) { case TEXT: { row = nlohmann::json::array({to_key(model_type), model_subtype, - utils::round_conf(conf), - ehash, ihash, + subj_hash, to_string(subj_name), subj_path, + std::round(100.0*conf)/100.0, + ehash, ihash, + nlohmann::json::value_t::null, nlohmann::json::value_t::null, //coor[0], coor[1], char_range[0], char_range[1], ctok_range[0], ctok_range[1], wtok_range[0], wtok_range[1], @@ -473,267 +410,440 @@ namespace andromeda case TABLE: { row = nlohmann::json::array({to_key(model_type), model_subtype, - utils::round_conf(conf), - ehash, ihash, + subj_hash, to_string(subj_name), subj_path, + std::round(100.0*conf)/100.0, + ehash, ihash, coor[0], coor[1], char_range[0], char_range[1], ctok_range[0], ctok_range[1], wtok_range[0], wtok_range[1], wtok_range_match, name, orig}); - } - break; default: - { + { row = nlohmann::json::array({to_key(model_type), model_subtype, - utils::round_conf(conf), - ehash, ihash, + subj_hash, to_string(subj_name), subj_path, + std::round(100.0*conf)/100.0, + ehash, ihash, coor[0], coor[1], char_range[0], char_range[1], ctok_range[0], ctok_range[1], wtok_range[0], wtok_range[1], wtok_range_match, - name, orig}); - } - } - - if(row.size()!=headers(subj).size()) - { - LOG_S(ERROR); + name, orig}); + } } + assert(row.size()==headers().size()); return row; } - std::vector base_instance::short_text_headers() - { - return SHORT_TEXT_HEADERS; - } - - std::vector base_instance::short_table_headers() - { - return SHORT_TABLE_HEADERS; - } - - std::string base_instance::get_name() const - { - return name; - } + bool base_instance::from_json_row(const nlohmann::json& row) + { + if((not row.is_array()) or row.size()!=19) + { + LOG_S(ERROR) << "inconsistent entity-row: " << row.dump(); + return false; + } - std::string base_instance::get_reference() const - { - std::string ref = subj_path; + model_type = to_modelname(row.at(0).get()); + model_subtype = row.at(1).get(); - ref += std::to_string(ehash)+"_"+ - std::to_string(model_type)+"_coor_"+ - std::to_string(coor.at(0))+"-"+ - std::to_string(coor.at(1))+"_char_"+ - std::to_string(char_range.at(0))+"-"+ - std::to_string(char_range.at(1)); + subj_hash = row.at(2).get(); + subj_name = to_subject_name(row.at(3).get()); + subj_path = row.at(4).get(); - return ref; - } + conf = (row.at(5).get()); - nlohmann::json base_instance::to_json() const - { - nlohmann::json result = nlohmann::json::object(); - { - result["ehash"] = ehash; - result["ihash"] = ihash; + ehash = row.at(6).get(); + ihash = row.at(7).get(); - result["confidence"] = utils::round_conf(conf); + coor = DEFAULT_COOR; + if(not row.at(8).is_null()) + { + coor.at(0) = row.at(8).get(); + } + + if(not row.at(9).is_null()) + { + coor.at(1) = row.at(9).get(); + } + + char_range.at(0) = row.at(10).get(); + char_range.at(1) = row.at(11).get(); - result["model-type"] = to_key(model_type); - result["model-subtype"] = model_subtype; + ctok_range.at(0) = row.at(12).get(); + ctok_range.at(1) = row.at(13).get(); - result["name"] = name; - result["orig"] = orig; + wtok_range.at(0) = row.at(14).get(); + wtok_range.at(1) = row.at(15).get(); - result["coor"] = coor; - result["row-span"] = row_span; - result["col-span"] = col_span; + wtok_range_match = row.at(16).get(); - result["char-range"] = char_range; - result["ctok-range"] = ctok_range; - result["wtok-range"] = wtok_range; + name = row.at(17).get(); + orig = row.at(18).get(); - result["wtok-range-match"] = wtok_range_match; + return true; } - return result; - } + std::vector base_instance::headers(subject_name subj) + { + switch(subj) + { + case TEXT: + { + return TEXT_HEADERS; + } + break; + case TABLE: + { + return TABLE_HEADERS; + } + break; - std::vector base_instance::to_row(std::size_t col_width) - { - switch(subj_name) - { - case TEXT: - { - std::vector row = - { - to_key(model_type), - model_subtype, + default: + { + return HEADERS; + } + } + } - std::to_string(utils::round_conf(conf)), + nlohmann::json base_instance::to_json_row(subject_name subj) const + { + nlohmann::json row; - std::to_string(ehash), - std::to_string(ihash), + switch(subj) + { + case TEXT: + { + row = nlohmann::json::array({to_key(model_type), model_subtype, + utils::round_conf(conf), + ehash, ihash, + char_range[0], char_range[1], + ctok_range[0], ctok_range[1], + wtok_range[0], wtok_range[1], + wtok_range_match, + name, orig}); + } + break; - std::to_string(char_range[0]), - std::to_string(char_range[1]), + case TABLE: + { + row = nlohmann::json::array({to_key(model_type), model_subtype, + utils::round_conf(conf), + ehash, ihash, + coor[0], coor[1], + char_range[0], char_range[1], + ctok_range[0], ctok_range[1], + wtok_range[0], wtok_range[1], + wtok_range_match, + name, orig}); - wtok_range_match? "true":"false", + } + break; - utils::to_fixed_size(name, col_width), - utils::to_fixed_size(orig, col_width) - }; - assert(row.size()==SHORT_TEXT_HEADERS.size()); - return row; + default: + { + row = nlohmann::json::array({to_key(model_type), model_subtype, + utils::round_conf(conf), + ehash, ihash, + coor[0], coor[1], + char_range[0], char_range[1], + ctok_range[0], ctok_range[1], + wtok_range[0], wtok_range[1], + wtok_range_match, + name, orig}); + } } - break; - case TABLE: + if(row.size()!=headers(subj).size()) { - std::vector row = - { - to_key(model_type), - model_subtype, + LOG_S(ERROR); + } - std::to_string(utils::round_conf(conf)), + return row; + } - std::to_string(ehash), - std::to_string(ihash), + std::vector base_instance::short_text_headers() + { + return SHORT_TEXT_HEADERS; + } - std::to_string(coor[0]), - std::to_string(coor[1]), + std::vector base_instance::short_table_headers() + { + return SHORT_TABLE_HEADERS; + } - std::to_string(char_range[0]), - std::to_string(char_range[1]), + std::string base_instance::get_reference() const + { + std::string ref = subj_path; - wtok_range_match? "true":"false", + ref += std::to_string(ehash)+"_"+ + std::to_string(model_type)+"_coor_"+ + std::to_string(coor.at(0))+"-"+ + std::to_string(coor.at(1))+"_char_"+ + std::to_string(char_range.at(0))+"-"+ + std::to_string(char_range.at(1)); - utils::to_fixed_size(name, col_width), - utils::to_fixed_size(orig, col_width) - }; - assert(row.size()==SHORT_TABLE_HEADERS.size()); + return ref; + } - return row; - } - break; + nlohmann::json base_instance::to_json() const + { + nlohmann::json result = nlohmann::json::object(); + { + result["subj_hash"] = subj_hash; + result["subj_name"] = to_string(subj_name); + result["subj_path"] = subj_path; - default: - { - std::vector row = - { - to_key(model_type), - model_subtype, + result["ehash"] = ehash; + result["ihash"] = ihash; - std::to_string(utils::round_conf(conf)), + result["confidence"] = utils::round_conf(conf); - std::to_string(ehash), - std::to_string(ihash), + result["model-type"] = to_key(model_type); + result["model-subtype"] = model_subtype; - std::to_string(char_range[0]), - std::to_string(char_range[1]), + result["name"] = name; + result["orig"] = orig; - wtok_range_match? "true":"false", + result["coor"] = coor; + result["row-span"] = row_span; + result["col-span"] = col_span; - utils::to_fixed_size(name, col_width), - utils::to_fixed_size(orig, col_width) - }; - assert(row.size()==SHORT_TEXT_HEADERS.size()); + result["char-range"] = char_range; + result["ctok-range"] = ctok_range; + result["wtok-range"] = wtok_range; - return row; - } + result["wtok-range-match"] = wtok_range_match; } - } - std::vector base_instance::to_row(std::string& text, - std::size_t name_width, - std::size_t orig_width) - { - std::string tmp_0=name; + return result; + } - std::string tmp_1=orig; - if(tmp_1.size()==0) - { - tmp_1 = text.substr(char_range[0], char_range[1]-char_range[0]); - } + std::vector base_instance::to_row(std::size_t col_width) + { + switch(subj_name) + { + case TEXT: + { + std::vector row = + { + to_key(model_type), + model_subtype, - std::vector row = - { to_key(model_type), model_subtype, - std::to_string(utils::round_conf(conf)), - std::to_string(ehash), std::to_string(ihash), - std::to_string(char_range[0]), std::to_string(char_range[1]), - wtok_range_match? "true":"false", - utils::to_fixed_size(tmp_0, name_width), - utils::to_fixed_size(tmp_1, orig_width) - }; - assert(row.size()==SHORT_TEXT_HEADERS.size()); + std::to_string(utils::round_conf(conf)), - return row; - } + std::to_string(ehash), + std::to_string(ihash), - bool operator<(const base_instance& lhs, - const base_instance& rhs) - { - if(lhs.subj_path==rhs.subj_path) - { - if(lhs.coor[0]==rhs.coor[0]) + std::to_string(char_range[0]), + std::to_string(char_range[1]), + + wtok_range_match? "true":"false", + + utils::to_fixed_size(name, col_width), + utils::to_fixed_size(orig, col_width) + }; + assert(row.size()==SHORT_TEXT_HEADERS.size()); + return row; + } + break; + + case TABLE: { - if(lhs.coor[1]==rhs.coor[1]) + std::vector row = { - if(lhs.char_range[0]==rhs.char_range[0]) - { - if(lhs.char_range[1]==rhs.char_range[1]) - { - //LOG_S(INFO) << lhs.model_type << "\t" << rhs.model_type; - - const auto& ltype = lhs.model_type; - const auto& rtype = rhs.model_type; - - if(ltype==rtype) - { - const auto& lstype = lhs.model_subtype; - const auto& rstype = rhs.model_subtype; - - //LOG_S(INFO) << lhs.model_subtype << "\t" << rhs.model_subtype; - return ((lstype.compare(rstype))<0); - } - else - { - return (ltyperhs.char_range[1]; - } - } - else - { - return lhs.char_range[0] row = + { + to_key(model_type), + model_subtype, + + std::to_string(utils::round_conf(conf)), + + std::to_string(ehash), + std::to_string(ihash), + + std::to_string(char_range[0]), + std::to_string(char_range[1]), + + wtok_range_match? "true":"false", + + utils::to_fixed_size(name, col_width), + utils::to_fixed_size(orig, col_width) + }; + assert(row.size()==SHORT_TEXT_HEADERS.size()); + + return row; } - } - else - { - return (lhs.subj_path==rhs.subj_path); - } - } + } + } + + std::vector base_instance::to_row(std::string& text, + std::size_t name_width, + std::size_t orig_width) + { + std::string tmp_0=name; + + std::string tmp_1=orig; + if(tmp_1.size()==0) + { + tmp_1 = text.substr(char_range[0], char_range[1]-char_range[0]); + } + + std::vector row = + { to_key(model_type), model_subtype, + std::to_string(utils::round_conf(conf)), + std::to_string(ehash), std::to_string(ihash), + std::to_string(char_range[0]), std::to_string(char_range[1]), + wtok_range_match? "true":"false", + utils::to_fixed_size(tmp_0, name_width), + utils::to_fixed_size(tmp_1, orig_width) + }; + assert(row.size()==SHORT_TEXT_HEADERS.size()); + + return row; + } + + bool operator==(const base_instance& lhs, + const base_instance& rhs) + { + return ((lhs.model_type==rhs.model_type) and + (lhs.model_subtype==rhs.model_subtype) and + (lhs.subj_name==rhs.subj_name) and + (lhs.subj_path==rhs.subj_path) and + (lhs.coor[0]==rhs.coor[0]) and + (lhs.coor[1]==rhs.coor[1]) and + (lhs.char_range[0]==rhs.char_range[0]) and + (lhs.char_range[1]==rhs.char_range[1])); + } -} + bool operator<(const base_instance& lhs, + const base_instance& rhs) + { + if(lhs.subj_name==rhs.subj_name) + { + if(lhs.subj_path==rhs.subj_path) + { + if(lhs.coor[0]==rhs.coor[0]) + { + if(lhs.coor[1]==rhs.coor[1]) + { + if(lhs.char_range[0]==rhs.char_range[0]) + { + if(lhs.char_range[1]==rhs.char_range[1]) + { + if(lhs.model_type==rhs.model_type) + { + return (lhs.model_subtyperhs.char_range[1]; + } + } + else + { + return lhs.char_range[0]rhs.char_range[1]; } return lhs.char_range[0]rhs.get_char_range(1); + } + return lhs.get_char_range(0) header = base_instance::short_text_headers(); diff --git a/src/andromeda/tooling/structs/items/rel/base.h b/src/andromeda/tooling/structs/items/rel/base.h index c2ad16ac..b15a02d3 100644 --- a/src/andromeda/tooling/structs/items/rel/base.h +++ b/src/andromeda/tooling/structs/items/rel/base.h @@ -32,18 +32,18 @@ namespace andromeda const base_instance& inst_i, const base_instance& inst_j); + friend bool operator<(const base_relation& lhs, const base_relation& rhs); + nlohmann::json to_json_row(); bool from_json_row(const nlohmann::json& row); std::vector to_row(std::size_t col_width); + std::string get_type() { return to_name(flvr); } std::string get_name() { return to_name(flvr); } hash_type get_hash_i() { return hash_i; } hash_type get_hash_j() { return hash_j; } - - //hash_type get_ihash_i() { return ihash_i; } - //hash_type get_ihash_j() { return ihash_j; } private: @@ -51,9 +51,6 @@ namespace andromeda val_type conf; hash_type hash_i, hash_j; - //hash_type hash_i, ihash_i; - //hash_type hash_j, ihash_j; - std::string name_i, name_j; }; @@ -127,15 +124,34 @@ namespace andromeda flvr(to_flvr(name)), conf(conf), - hash_i(inst_i.ehash), - //ihash_i(inst_i.ihash), + //hash_i(inst_i.get_ehash()), + hash_i(inst_i.get_ihash()), - hash_j(inst_j.ehash), - //ihash_j(inst_j.ihash), + //hash_j(inst_j.get_ehash()), + hash_j(inst_j.get_ihash()), - name_i(inst_i.name), - name_j(inst_j.name) + name_i(inst_i.get_name()), + name_j(inst_j.get_name()) {} + + bool operator<(const base_relation& lhs, const base_relation& rhs) + { + if(lhs.flvr==rhs.flvr) + { + if(lhs.hash_i==rhs.hash_i) + { + return (rhs.hash_i implicit_models = {"lapos"}; public: @@ -55,6 +57,12 @@ namespace andromeda virtual ~base_subject() {} + std::string get_self_ref(); + void set_self_ref(std::string sref); + + bool is_valid() const { return valid; } + void set_valid(bool val) { this->valid=val; } + static bool set_prov_refs(const nlohmann::json& data, const std::vector >& doc_provs, std::vector >& base_provs); @@ -103,22 +111,26 @@ namespace andromeda std::string key, std::vector >& vals); - public: - + //public: + protected: + bool valid; subject_name name; hash_type hash; // hash of the item hash_type dhash; // hash of the document of the item - + std::string dloc; // location of item in the document # - + std::string sref; + + public: + std::set applied_models; std::vector properties; std::vector instances; std::vector relations; - + //std::vector entities; }; @@ -129,8 +141,9 @@ namespace andromeda hash(-1), dhash(-1), - dloc(""), - + dloc("#"), + sref("#"), + applied_models({}), properties({}), @@ -145,8 +158,9 @@ namespace andromeda hash(-1), dhash(-1), - dloc(""), - + dloc("#"), + sref("#"), + applied_models({}), properties({}), @@ -156,7 +170,7 @@ namespace andromeda base_subject::base_subject(uint64_t dhash, std::string dloc, - subject_name name)://, prov_element& prov): + subject_name name): valid(true), name(name), @@ -164,14 +178,46 @@ namespace andromeda dhash(dhash), dloc(dloc), - + sref("#"), + applied_models({}), properties({}), instances({}), relations({}) - {} + { + auto parts = utils::split(dloc, "#"); + if(parts.size()==2) + { + sref += parts.at(1); + } + else + { + LOG_S(WARNING) << "could not derive sref from dloc: " << dloc; + } + } + void base_subject::set_self_ref(std::string sref) + { + this->sref = sref; + } + + std::string base_subject::get_self_ref() + { + return sref; + /* + if(dloc=="#") + { + return dloc; + } + + auto parts = utils::split(dloc, "#"); + assert(parts.size()==2); + + return ("#"+parts.at(1)); + */ + } + bool base_subject::set_prov_refs(const nlohmann::json& data, const std::vector >& doc_provs, std::vector >& base_provs) @@ -203,7 +249,6 @@ namespace andromeda if(prov!=NULL) { nlohmann::json pref; - //pref[base_subject::jref_lbl] = prov->get_pref(); pref[base_subject::jref_lbl] = prov->get_self_ref(); result.push_back(pref); @@ -241,8 +286,9 @@ namespace andromeda nlohmann::json result = nlohmann::json::object({}); { - result[hash_lbl] = hash; + result[subj_hash_lbl] = hash; result[dloc_lbl] = dloc; + result[sref_lbl] = sref; } if((properties.size()>0) and (filters.size()==0 or filters.count(prps_lbl))) @@ -265,6 +311,11 @@ namespace andromeda if(filters.size()==0 or filters.count(applied_models_lbl)) { + for(auto implicit_model:implicit_models) + { + applied_models.erase(implicit_model); + } + result[applied_models_lbl] = applied_models; } @@ -292,14 +343,10 @@ namespace andromeda bool base_subject::_from_json(const nlohmann::json& item) { - hash = item.value(hash_lbl, hash); - dloc = item.value(dloc_lbl, dloc); + hash = item.value(subj_hash_lbl, hash); - applied_models.clear(); - if(item.count(applied_models_lbl)) - { - applied_models = item.value(applied_models_lbl, applied_models); - } + dloc = item.value(dloc_lbl, dloc); + sref = item.value(sref_lbl, sref); bool read_props=true, read_insts=true, read_rels=true; @@ -324,6 +371,35 @@ namespace andromeda read_rels = andromeda::from_json(relations, rels); } + applied_models.clear(); + if(item.count(applied_models_lbl)) + { + applied_models = item.value(applied_models_lbl, applied_models); + } + else + { + for(auto& prop:properties) + { + applied_models.insert(prop.get_type()); + } + + for(auto& inst:instances) + { + applied_models.insert(inst.get_type()); + } + + for(auto& rel:relations) + { + applied_models.insert(rel.get_type()); + } + + } + + for(auto implicit_model:implicit_models) + { + applied_models.erase(implicit_model); + } + return (read_props and read_insts and read_rels); } @@ -349,7 +425,7 @@ namespace andromeda { nlohmann::json& json_vals = result[key]; json_vals = nlohmann::json::array({}); - + for(auto& val:vals) { json_vals.push_back(val->to_json(filters)); diff --git a/src/andromeda/tooling/structs/subjects/document.h b/src/andromeda/tooling/structs/subjects/document.h index e075fc85..32c7bae8 100644 --- a/src/andromeda/tooling/structs/subjects/document.h +++ b/src/andromeda/tooling/structs/subjects/document.h @@ -34,10 +34,6 @@ namespace andromeda // element-labels const static inline std::string pdforder_lbl = "pdf-order"; - //const static inline std::string prov_lbl = "prov"; - //const static inline std::string text_lbl = "text"; - //const static inline std::string data_lbl = "data"; - const static inline std::string maintext_name_lbl = name_lbl; const static inline std::string maintext_type_lbl = type_lbl; @@ -70,6 +66,15 @@ namespace andromeda uint64_t get_hash() const { return doc_hash; } std::string get_name() const { return doc_name; } + std::filesystem::path get_filepath() { return filepath; } + + nlohmann::json& get_orig() { return orig; } + + std::vector >& get_pages() { return pages; } + std::vector >& get_provs() { return provs; } + + + void show(bool txt=true, bool mdls=false, bool ctokens=false, bool wtokens=true, bool prps=true, bool insts=true, bool rels=true); @@ -87,6 +92,12 @@ namespace andromeda void init_provs(); void show_provs(); + private: + + void join_properties(); + void join_instances(); + void join_applied_models(); + private: void set_kept(const nlohmann::json& data); @@ -106,7 +117,7 @@ namespace andromeda bool finalise_instances(); bool finalise_relations(); - public: + private: std::filesystem::path filepath; @@ -126,6 +137,8 @@ namespace andromeda std::vector > body; std::vector > meta; + public: + std::vector > > texts; std::vector > > tables; std::vector > > figures; @@ -185,9 +198,9 @@ namespace andromeda auto& desc = result.at("description"); for(auto& prop:properties) { - if(prop.get_type()=="language") + if(prop.is_type("language")) { - std::vector langs = {prop.get_name()}; + std::vector langs = { prop.get_label() }; desc["languages"] = langs; } } @@ -236,8 +249,7 @@ namespace andromeda std::set doc_filters = { "hash", "dloc", "prov", "text", "data", - "captions", "footnotes", "mentions", - "properties"}; + "captions", "footnotes", "mentions"}; base_subject::to_json(result, texts_lbl, texts, doc_filters); base_subject::to_json(result, tables_lbl, tables, doc_filters); @@ -261,19 +273,26 @@ namespace andromeda base_subject::from_json(doc, pages_lbl, pages); base_subject::from_json(doc, provs_lbl, provs); - base_subject::from_json(doc, provs, texts_lbl , texts ); - base_subject::from_json(doc, provs, tables_lbl , tables ); + base_subject::from_json(doc, provs, texts_lbl, texts); + base_subject::from_json(doc, provs, tables_lbl, tables); base_subject::from_json(doc, provs, figures_lbl, figures); - + base_subject::from_json(doc, provs, page_headers_lbl, page_headers); base_subject::from_json(doc, provs, page_footers_lbl, page_footers); base_subject::from_json(doc, provs, footnotes_lbl, footnotes); base_subject::from_json(doc, provs, other_lbl, other); + + { + join_properties(); + join_instances(); + + join_applied_models(); + } return true; } - + bool subject::from_json(const nlohmann::json& item, const std::vector >& doc_provs) { @@ -470,142 +489,172 @@ namespace andromeda return (valid_props and valid_insts and valid_rels); } - + bool subject::finalise_properties() { - std::map property_total; - std::map, val_type> property_label_mapping; - + //LOG_S(INFO) << "#-properties: " << properties.size(); + + // only keep document global properties + //std::set > doc_properties={}; + //for(auto& prop:properties) + //{ + //doc_properties.insert({prop.get_subj_hash(), prop.get_model()}); + //} + for(auto& text:texts) { for(auto& prop:text->properties) { - std::string mdl = prop.get_type(); - std::string lbl = prop.get_name(); - - val_type conf = prop.get_conf(); - val_type dst = text->dst; - - if(property_total.count(mdl)==1) - { - property_total[mdl] += dst; - } - else - { - property_total[mdl] = dst; - } - - std::pair key={mdl,lbl}; - if(property_label_mapping.count(key)==1) - { - property_label_mapping[key] += dst*conf; - } - else - { - property_label_mapping[key] = dst*conf; - } - } + //std::pair key({prop.get_subj_hash(), prop.get_model()}); + //if(doc_properties.count(key)==0) + //{ + properties.push_back(prop); + //} + } + text->properties.clear(); } - properties.clear(); - for(auto itr=property_label_mapping.begin(); itr!=property_label_mapping.end(); itr++) + for(auto& table:tables) { - std::string mdl = (itr->first).first; - itr->second /= (property_total.at(mdl)); - - base_property prop((itr->first).first, (itr->first).second, itr->second); - properties.push_back(prop); - } - - //LOG_S(INFO) << "properties: \n\n" << tabulate(properties); - - std::sort(properties.begin(), properties.end()); - - //LOG_S(INFO) << "properties: \n\n" << tabulate(properties); + for(auto& prop:table->properties) + { + //std::pair key({prop.get_subj_hash(), prop.get_model()}); + //if(doc_properties.count(key)==0) + //{ + properties.push_back(prop); + //} + } + table->properties.clear(); + } - for(auto itr=properties.begin(); itr!=properties.end(); ) + for(auto& figure:figures) { - auto next = itr; - next++; - - if(itr==properties.end() or next==properties.end()) - { - break; - } - else if(itr->get_type()==next->get_type()) + for(auto& prop:figure->properties) { - properties.erase(next); - } - else - { - itr++; - } - } + //std::pair key({prop.get_subj_hash(), prop.get_model()}); + //if(doc_properties.count(key)==0) + //{ + properties.push_back(prop); + //} + } + figure->properties.clear(); + } + //LOG_S(INFO) << "#-properties: " << properties.size(); + + std::sort(properties.begin(), properties.end()); + + auto itr = std::unique(properties.begin(), properties.end()); + properties.erase(itr, properties.end()); + + //LOG_S(INFO) << "#-properties: " << properties.size(); + return true; } bool subject::finalise_instances() { + //LOG_S(INFO) << "#-instances: " << instances.size(); + instances.clear(); + //LOG_S(INFO) << "#-instances: " << instances.size(); + for(auto& subj:texts) { - //LOG_S(INFO) << __FUNCTION__ << ": " << subj.instances.size(); - + //LOG_S(INFO) << "#-instances " << subj->get_self_ref() << ": " << subj->instances.size(); + for(auto& ent:subj->instances) { - instances.emplace_back(subj->get_hash(), - subj->get_name(), - subj->get_path(), - ent); + instances.push_back(ent); + + //if(ent.get_subj_path()=="") + //{ + //LOG_S(INFO) << ent.to_json().dump(); + //} } } - //LOG_S(INFO) << "total #-insts: " << instances.size(); - + //LOG_S(INFO) << " texts #-instances: " << instances.size(); + for(auto& subj:tables) { + //LOG_S(INFO) << "#-instances " << subj->get_self_ref() << ": " << subj->instances.size(); for(auto& ent:subj->instances) { - instances.emplace_back(subj->get_hash(), - subj->get_name(), - subj->get_path(), - ent); + instances.push_back(ent); + + //if(ent.get_subj_path()=="") + //{ + //LOG_S(INFO) << " => " << ent.to_json().dump(); + //} } for(auto& capt:subj->captions) { + //LOG_S(INFO) << "#-instances " << capt->get_self_ref() << ": " << capt->instances.size(); + for(auto& ent:capt->instances) { - instances.emplace_back(capt->get_hash(), - capt->get_name(), - capt->get_path(), - ent); + instances.push_back(ent); + + //if(ent.get_subj_path()=="") + //{ + //LOG_S(INFO) << ent.to_json().dump(); + //} } } } - + //LOG_S(INFO) << "tables #-instances: " << instances.size(); + for(auto& subj:figures) { + //LOG_S(INFO) << "#-instances " << subj->get_self_ref() << ": " << subj->instances.size(); + for(auto& ent:subj->instances) { - instances.emplace_back(subj->get_hash(), - subj->get_name(), - subj->get_path(), - ent); + instances.push_back(ent); } for(auto& capt:subj->captions) { + //LOG_S(INFO) << "#-instances " << capt->get_self_ref() << ": " << capt->instances.size(); + for(auto& ent:capt->instances) { - instances.emplace_back(capt->get_hash(), - capt->get_name(), - capt->get_path(), - ent); + instances.push_back(ent); } } } + //LOG_S(INFO) << "figures #-instances: " << instances.size(); + + //for(auto& ent:instances) + //{ + //if(ent.get_subj_path()=="") + //{ + //LOG_S(INFO) << ent.to_json().dump(); + //} + //} + + //LOG_S(INFO) << "#-instances: " << instances.size(); + + std::sort(instances.begin(), instances.end()); + + /* + for(std::size_t l=0; l+1captions) + { + for(auto& rel:capt->relations) + { + relations.push_back(rel); + } + } } + for(auto& figure:figures) + { + for(auto& rel:figure->relations) + { + relations.push_back(rel); + } + + for(auto& capt:figure->captions) + { + for(auto& rel:capt->relations) + { + relations.push_back(rel); + } + } + } + + std::sort(relations.begin(), relations.end()); + return true; } + void subject::join_properties() + { + for(auto& text:texts) { text->properties.clear(); } + for(auto& table:tables) { table->properties.clear(); } + for(auto& figure:figures) { figure->properties.clear(); } + + for(auto& prop:this->properties) + { + std::string path = prop.get_subj_path(); + + auto parts = utils::split(path, "/"); + + if(parts.size()==1) // document properties, nothing to be done ... + {} + else if(parts.size()==3 and parts.at(1)==texts_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(texts.at(ind)->get_hash()==prop.get_subj_hash()); + texts.at(ind)->properties.push_back(prop); + texts.at(ind)->applied_models.insert(prop.get_type()); + } + else if(parts.size()==3 and parts.at(1)==tables_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(tables.at(ind)->get_hash()==prop.get_subj_hash()); + tables.at(ind)->properties.push_back(prop); + tables.at(ind)->applied_models.insert(prop.get_type()); + } + else if(parts.size()==3 and parts.at(1)==figures_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(figures.at(ind)->get_hash()==prop.get_subj_hash()); + figures.at(ind)->properties.push_back(prop); + figures.at(ind)->applied_models.insert(prop.get_type()); + } + else if(parts.size()==5 and parts.at(1)==tables_lbl and parts.at(3)==captions_lbl) + { + int ti = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(tables.at(ti)->get_hash()==prop.get_subj_hash()); + tables.at(ti)->captions.at(ci)->properties.push_back(prop); + tables.at(ti)->captions.at(ci)->applied_models.insert(prop.get_type()); + } + else if(parts.size()==5 and parts.at(1)==figures_lbl and parts.at(3)==captions_lbl) + { + int fi = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(figures.at(fi)->get_hash()==prop.get_subj_hash()); + figures.at(fi)->captions.at(ci)->properties.push_back(prop); + figures.at(fi)->captions.at(ci)->applied_models.insert(prop.get_type()); + } + else + { + LOG_S(WARNING) << "ignoring properties with subj-path: " << path; + } + } + } + + void subject::join_instances() + { + for(auto& text:texts) { text->instances.clear(); } + for(auto& table:tables) { table->instances.clear(); } + for(auto& figure:figures) { figure->instances.clear(); } + + for(auto& inst:this->instances) + { + std::string path = inst.get_subj_path(); + + auto parts = utils::split(path, "/"); + + if(parts.size()==1) // document instances, nothing to be done ... + {} + else if(parts.size()==3 and parts.at(1)==texts_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(texts.at(ind)->get_hash()==inst.get_subj_hash()); + texts.at(ind)->instances.push_back(inst); + texts.at(ind)->applied_models.insert(inst.get_type()); + } + else if(parts.size()==3 and parts.at(1)==tables_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(tables.at(ind)->get_hash()==inst.get_subj_hash()); + + assert(inst.get_name().size()>0); + + tables.at(ind)->instances.push_back(inst); + tables.at(ind)->applied_models.insert(inst.get_type()); + } + else if(parts.size()==3 and parts.at(1)==figures_lbl) + { + int ind = std::stoi(parts.at(2)); + + assert(figures.at(ind)->get_hash()==inst.get_subj_hash()); + + figures.at(ind)->instances.push_back(inst); + figures.at(ind)->applied_models.insert(inst.get_type()); + } + else if(parts.size()==5 and parts.at(1)==tables_lbl and parts.at(3)==captions_lbl) + { + int ti = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(tables.at(ti)->captions.at(ci)->get_hash()==inst.get_subj_hash()); + + tables.at(ti)->captions.at(ci)->instances.push_back(inst); + tables.at(ti)->captions.at(ci)->applied_models.insert(inst.get_type()); + } + else if(parts.size()==5 and parts.at(1)==figures_lbl and parts.at(3)==captions_lbl) + { + int fi = std::stoi(parts.at(2)); + int ci = std::stoi(parts.at(4)); + + assert(figures.at(fi)->captions.at(ci)->get_hash()==inst.get_subj_hash()); + + figures.at(fi)->captions.at(ci)->instances.push_back(inst); + figures.at(fi)->captions.at(ci)->applied_models.insert(inst.get_type()); + } + else + { + LOG_S(WARNING) << "ignoring instances with subj-path: " << path; + } + } + } + + void subject::join_applied_models() + { + for(auto& text:texts) + { + text->applied_models = this->applied_models; + } + + for(auto& table:tables) + { + table->applied_models = this->applied_models; + + for(auto& capt:table->captions) + { + capt->applied_models = this->applied_models; + } + } + + for(auto& figure:figures) + { + figure->applied_models = this->applied_models; + + for(auto& capt:figure->captions) + { + capt->applied_models = this->applied_models; + } + } + } + } #endif diff --git a/src/andromeda/tooling/structs/subjects/document/doc_captions.h b/src/andromeda/tooling/structs/subjects/document/doc_captions.h index d0f37483..05217b46 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_captions.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_captions.h @@ -71,7 +71,7 @@ namespace andromeda obj_to_caption={}; obj_to_notes={}; - auto& provs = doc.provs; + auto& provs = doc.get_provs(); page_nums={}; is_assigned={}; @@ -169,7 +169,7 @@ namespace andromeda ind_type prov_ind = prov_to_index.at(prov); ind_type page_num = prov->get_page(); - std::string text = elem->text; + std::string text = elem->get_text(); text = utils::to_lower(text); text = utils::strip(text); @@ -275,7 +275,7 @@ namespace andromeda template void doc_captions::assign_captions(doc_type& doc) { - auto& provs = doc.provs; + auto& provs = doc.get_provs(); for(auto itr=obj_to_caption.begin(); itr!=obj_to_caption.end(); itr++) { @@ -300,20 +300,11 @@ namespace andromeda { auto& table = prov_to_table.at(prov_i); - - //LOG_S(WARNING) << "table: " - //<< prov_i->maintext_ind; - for(ind_type j:itr->second) { auto& prov_j = provs.at(j); auto& caption = prov_to_text.at(prov_j); - //LOG_S(WARNING) << "\tassigning caption " - //<< prov_i->maintext_ind - //<< " to table " - //<< prov_j->maintext_ind; - table->captions.push_back(caption); } } @@ -321,18 +312,10 @@ namespace andromeda { auto& figure = prov_to_figure.at(prov_i); - //LOG_S(WARNING) << "figure: " - //<< prov_i->maintext_ind; - for(ind_type j:itr->second) { auto& prov_j = provs.at(j); auto& caption = prov_to_text.at(prov_j); - - //LOG_S(WARNING) << "\tassigning caption " - //<< prov_i->maintext_ind - //<< " to figure " - //<< prov_j->maintext_ind; figure->captions.push_back(caption); } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h index 146cc0db..fa7c951c 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_maintext.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_maintext.h @@ -84,8 +84,8 @@ namespace andromeda auto& curr_prov = curr->provs.back(); auto& next_prov = next->provs.front(); - auto& curr_text = curr->text; - auto& next_text = next->text; + std::string curr_text = curr->get_text(); + std::string next_text = next->get_text(); if(curr_prov->get_type()!="paragraph" or next_prov->get_type()!="paragraph" or @@ -105,7 +105,7 @@ namespace andromeda (jump_col or jump_page)) { curr->concatenate(next); - next->valid=false; + next->set_valid(false); } } @@ -113,7 +113,7 @@ namespace andromeda auto itr=texts.begin(); while(itr!=texts.end()) { - if((*itr)->valid) + if((*itr)->is_valid()) { itr++; } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h index 779f26c4..9cac97b3 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_normalisation.h @@ -13,7 +13,6 @@ namespace andromeda const static inline std::set is_text = { "title", "subtitle-level-1", "paragraph", "list-item", - //"footnote", "caption", "formula", "equation" }; @@ -84,7 +83,7 @@ namespace andromeda template void doc_normalisation::set_pdforder() { - auto& orig = doc.orig; + auto& orig = doc.get_orig(); if(orig.count(doc_type::maintext_lbl)==0) { @@ -102,9 +101,9 @@ namespace andromeda template void doc_normalisation::init_pages() { - auto& orig = doc.orig; - - auto& pages = doc.pages; + auto& orig = doc.get_orig(); + + auto& pages = doc.get_pages(); pages.clear(); for(ind_type l=0; l void doc_normalisation::unroll_provs() { - auto& orig = doc.orig; + auto& orig = doc.get_orig(); nlohmann::json& old_maintext = orig.at(doc_type::maintext_lbl); nlohmann::json new_maintext = nlohmann::json::array({}); @@ -191,10 +190,11 @@ namespace andromeda template void doc_normalisation::init_provs() { - std::string doc_name = doc.doc_name; + //std::string doc_name = doc.doc_name; + std::string doc_name = doc.get_name(); - auto& orig = doc.orig; - auto& provs = doc.provs; + auto& orig = doc.get_orig(); + auto& provs = doc.get_provs(); provs.clear(); @@ -273,10 +273,11 @@ namespace andromeda template void doc_normalisation::init_items() { - std::string doc_name = doc.doc_name; + //std::string doc_name = doc.doc_name; + std::string doc_name = doc.get_name(); - auto& orig = doc.orig; - auto& provs = doc.provs; + auto& orig = doc.get_orig(); + auto& provs = doc.get_provs(); auto& texts = doc.texts; auto& tables = doc.tables; @@ -325,7 +326,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -344,7 +345,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); tables.push_back(subj); @@ -365,7 +366,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); figures.push_back(subj); @@ -382,7 +383,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -401,7 +402,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -420,7 +421,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -445,7 +446,7 @@ namespace andromeda std::string dloc = ss.str(); - auto subj = std::make_shared >(doc.doc_hash, dloc, prov); + auto subj = std::make_shared >(doc.get_hash(), dloc, prov); bool valid = subj->set_data(item); if(valid) @@ -480,27 +481,89 @@ namespace andromeda void doc_normalisation::resolve_paths() { auto& texts = doc.texts; + + auto& footnotes = doc.footnotes; + auto& page_headers = doc.page_headers; + auto& page_footers = doc.page_footers; + auto& other = doc.other; + auto& tables = doc.tables; auto& figures = doc.figures; for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:texts.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::texts_lbl << "/" << l; + prov->set_item_ref(ss.str()); + } + } + + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:footnotes.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:page_headers.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:page_footers.at(l)->provs) + { prov->set_item_ref(ss.str()); } } + for(index_type l=0; lset_self_ref(ss.str()); + + for(auto& prov:other.at(l)->provs) + { + prov->set_item_ref(ss.str()); + } + } + for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:tables.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::tables_lbl << "/" << l; - prov->set_item_ref(ss.str()); } @@ -514,17 +577,21 @@ namespace andromeda << doc_type::captions_lbl << "/" << k; prov->set_item_ref(ss.str()); + + tables.at(l)->captions.at(k)->set_self_ref(ss.str()); } } } for(index_type l=0; lset_self_ref(ss.str()); + for(auto& prov:figures.at(l)->provs) { - std::stringstream ss; - ss << "#/" << doc_type::figures_lbl << "/" << l; - prov->set_item_ref(ss.str()); } @@ -538,6 +605,8 @@ namespace andromeda << doc_type::captions_lbl << "/" << k; prov->set_item_ref(ss.str()); + + figures.at(l)->captions.at(k)->set_self_ref(ss.str()); } } } diff --git a/src/andromeda/tooling/structs/subjects/document/doc_order.h b/src/andromeda/tooling/structs/subjects/document/doc_order.h index d135e0d0..de95f8dc 100644 --- a/src/andromeda/tooling/structs/subjects/document/doc_order.h +++ b/src/andromeda/tooling/structs/subjects/document/doc_order.h @@ -84,10 +84,10 @@ namespace andromeda template void doc_order::order_maintext(doc_type& doc) - { + { // make a deep-copy ! prov_vec_type provs={}; - for(auto& prov:doc.provs) + for(auto& prov:doc.get_provs()) { provs.push_back(*prov); } @@ -100,17 +100,19 @@ namespace andromeda template void doc_order::update_document(doc_type& doc, prov_vec_type& provs) { + nlohmann::json& orig = doc.get_orig(); + // copy ... - nlohmann::json maintext = doc.orig["main-text"]; + nlohmann::json maintext = orig["main-text"]; // re-order for(std::size_t l=0; l >& doc_provs); - std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + bool is_valid() { return (base_subject::valid); } bool set_data(const nlohmann::json& data); @@ -38,10 +39,12 @@ namespace andromeda void set_hash(); - public: + private: sval_type conf; std::string created_by; + + public: std::vector > provs; @@ -135,8 +138,11 @@ namespace andromeda bool subject
::from_json(const nlohmann::json& json_figure) { - base_subject::valid = true; - + { + base_subject::valid = true; + base_subject::_from_json(json_figure); + } + { conf = json_figure.value(base_subject::confidence_lbl, conf); created_by = json_figure.value(base_subject::created_by_lbl, created_by); @@ -169,6 +175,18 @@ namespace andromeda bool subject
::set_tokens(std::shared_ptr char_normaliser, std::shared_ptr text_normaliser) { + valid = true; + + for(auto& caption:captions) + { + caption->set_tokens(char_normaliser, text_normaliser); + } + + for(auto& footnote:footnotes) + { + footnote->set_tokens(char_normaliser, text_normaliser); + } + return true; } diff --git a/src/andromeda/tooling/structs/subjects/table.h b/src/andromeda/tooling/structs/subjects/table.h index 0e863440..b91d99d3 100644 --- a/src/andromeda/tooling/structs/subjects/table.h +++ b/src/andromeda/tooling/structs/subjects/table.h @@ -24,7 +24,8 @@ namespace andromeda void clear(); - std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + //std::string get_path() const { return (provs.size()>0? (provs.at(0)->get_item_ref()):"#"); } + bool is_valid() { return (base_subject::valid); } virtual nlohmann::json to_json(const std::set& filters); @@ -64,10 +65,12 @@ namespace andromeda bool is_legacy(const nlohmann::json& grid); - public: + private: sval_type conf; std::string created_by; + + public: std::vector > provs; @@ -207,12 +210,22 @@ namespace andromeda bool subject
::from_json(const nlohmann::json& json_table) { + //LOG_S(INFO) << __FUNCTION__; + + { + base_subject::valid = true; + base_subject::_from_json(json_table); + } + { conf = json_table.value(base_subject::confidence_lbl, conf); created_by = json_table.value(base_subject::created_by_lbl, created_by); } - { + { + nrows = json_table.at("#-rows"); + ncols = json_table.at("#-cols"); + nlohmann::json grid = json_table.at("data"); for(ind_type i=0; i::set_data(const nlohmann::json& item) { base_subject::clear_models(); + data.clear(); { @@ -263,8 +277,6 @@ namespace andromeda data.push_back({}); for(ind_type j=0; j(); bbox[2] = coor.at(2).get(); bbox[3] = coor.at(3).get(); - - //LOG_S(INFO) << bbox[0] << ", " - //<< bbox[1] << ", " - //<< bbox[2] << ", " - //<< bbox[3]; } std::array row_span={i,i+1}; @@ -376,7 +383,7 @@ namespace andromeda { for(std::size_t j=0; jset_tokens(char_normaliser, text_normaliser); + } + + for(auto& footnote:footnotes) + { + footnote->set_tokens(char_normaliser, text_normaliser); + } + for(auto& row:data) { for(auto& cell:row) @@ -395,7 +412,7 @@ namespace andromeda valid = (valid and cell.set_tokens(char_normaliser, text_normaliser)); } } - + return valid; } @@ -419,8 +436,8 @@ namespace andromeda range_type min_range = {0, 0}; table_range_type table_min_range = {0, 0}; - base_instance fake(base_subject::hash, NULL_MODEL, - "fake", "fake", "fake", + base_instance fake(base_subject::hash, TABLE, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", coor, table_min_range, table_min_range, min_range, min_range, min_range); @@ -437,7 +454,8 @@ namespace andromeda std::numeric_limits::max(), std::numeric_limits::max()}; - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TABLE, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", coor, table_max_range, table_max_range, max_range, max_range, max_range); @@ -452,7 +470,7 @@ namespace andromeda grid.push_back({}); for(uint64_t j=0; j0? (provs.at(0)->get_item_ref()):"#"); } - bool is_valid() { return (base_subject::valid and text_element::text_valid); } + bool is_valid() { return (base_subject::valid and text_element::is_text_valid()); } virtual nlohmann::json to_json(const std::set& filters); @@ -148,13 +147,13 @@ namespace andromeda //LOG_S(INFO) << " -> subject::dhash = '" << dhash << "'"; //LOG_S(INFO) << " -> subject::text_hash = '" << text_element::text_hash << "'"; - std::vector hashes={dhash, text_element::text_hash}; + std::vector hashes={dhash, text_element::get_text_hash()}; base_subject::hash = utils::to_hash(hashes); //LOG_S(INFO) << " -> base_subject::hash = " << base_subject::hash; //LOG_S(INFO) << " -> subject::hash = " << subject::hash; - return text_element::text_valid; + return text_element::is_text_valid(); } bool subject::set_data(const nlohmann::json& item) @@ -207,7 +206,8 @@ namespace andromeda typename std::vector::iterator subject::insts_beg(std::array char_rng) { - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TEXT, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", char_rng, {0,0}, {0,0}); return std::lower_bound(instances.begin(), instances.end(), fake); @@ -215,19 +215,21 @@ namespace andromeda typename std::vector::iterator subject::insts_end(std::array char_rng) { - base_instance fake(base_subject::hash, NULL_MODEL, "fake", "fake", "fake", + base_instance fake(base_subject::hash, TEXT, get_self_ref(), + NULL_MODEL, "fake", "fake", "fake", char_rng, {0,0}, {0,0}); return std::upper_bound(instances.begin(), instances.end(), fake); } - bool subject::get_property_label(const std::string name, std::string& label) + bool subject::get_property_label(const std::string model_name, std::string& label) { for(auto& prop:properties) { - if(name==prop.get_type()) + //if(name==prop.get_type()) + if(prop.is_type(model_name)) { - label = prop.get_name(); + label = prop.get_label(); return true; } } @@ -246,25 +248,25 @@ namespace andromeda for(auto& inst:instances) { - inst.ctok_range = text_element::get_char_token_range(inst.char_range); - inst.wtok_range = text_element::get_word_token_range(inst.char_range); + inst.set_ctok_range(text_element::get_char_token_range(inst.get_char_range())); + inst.set_wtok_range(text_element::get_word_token_range(inst.get_char_range())); inst.verify_wtok_range_match(word_tokens); } } - void subject::contract_wtokens_from_instances(model_name name) + void subject::contract_wtokens_from_instances(model_name model) { std::vector candidates={}; for(auto& inst:instances) { - if(inst.model_type==name and - inst.wtok_range[0] constants::special_words={"''"}; - + const std::set constants::abbreviations={"e.g.", "i.e.", "et al.", "etc."}; } diff --git a/src/andromeda/tooling/structs/tokens/word_token.h b/src/andromeda/tooling/structs/tokens/word_token.h index 67a9794a..5b93235a 100644 --- a/src/andromeda/tooling/structs/tokens/word_token.h +++ b/src/andromeda/tooling/structs/tokens/word_token.h @@ -79,13 +79,13 @@ namespace andromeda hash_type get_hash() const { return hash; } - range_type get_rng() { return rng; }; - index_type get_rng(index_type l) { return rng[l]; }; + range_type get_rng() const { return rng; }; + index_type get_rng(index_type l) const { return rng[l]; }; std::string get_word() const { return word; } - std::string get_orig(const std::string& text) { return text.substr(rng[0], rng[1]-rng[0]); } + std::string get_orig(const std::string& text) const { return text.substr(rng[0], rng[1]-rng[0]); } - std::string get_pos() { return pos; } // part-of-speech + std::string get_pos() const { return pos; } // part-of-speech std::set get_tags() const { return tags; } // tags void set_word(std::string word); @@ -93,7 +93,7 @@ namespace andromeda void set_tag(std::string tag); void set_known(bool known); - bool has_tag(std::string tag); + bool has_tag(std::string tag) const; bool is_known(); word_token get_word_token(); @@ -252,7 +252,7 @@ namespace andromeda this->tags.insert(tag); } - bool word_token::has_tag(std::string tag) + bool word_token::has_tag(std::string tag) const { return ((this->tags.count(tag))>0); } diff --git a/src/andromeda/utils/string/utils.h b/src/andromeda/utils/string/utils.h index d2a37375..9e6d6e66 100644 --- a/src/andromeda/utils/string/utils.h +++ b/src/andromeda/utils/string/utils.h @@ -47,7 +47,8 @@ namespace andromeda template value_type round_conf(value_type conf) { - auto v = std::ceil(100.0*conf)/100.0; + //auto v = std::ceil(100.0*conf)/100.0; + auto v = std::round(100.0*conf)/100.0; if(v>1.0) { @@ -56,6 +57,7 @@ namespace andromeda return v; } + std::string replace(std::string text, std::string word_0, std::string word_1) { @@ -395,6 +397,123 @@ namespace andromeda return (itr-headers.begin()); } + + template + std::string create_path(std::string path_0, ind_type ind_0) + { + std::stringstream ss_0; + ss_0 << std::setw(6) << std::setfill('0') << ind_0; + + std::stringstream ss; + ss << "#" << "/" << path_0 << "/" << ss_0.str(); + + return ss.str(); + } + + template + std::string create_path(std::string dloc, std::string path_0, ind_type ind_0) + { + std::stringstream ss_0; + ss_0 << std::setw(6) << std::setfill('0') << ind_0; + + std::stringstream ss; + ss << dloc << "#" << "/" << path_0 << "/" << ss_0.str(); + + return ss.str(); + } + + template + std::string create_path(std::string path_0, ind_type ind_0, + std::string path_1, ind_type ind_1) + { + std::stringstream ss_0; + ss_0 << std::setw(6) << std::setfill('0') << ind_0; + + std::stringstream ss_1; + ss_1 << std::setw(6) << std::setfill('0') << ind_1; + + std::stringstream ss; + ss << "#" << "/" + << path_0 << "/" << ss_0.str() << "/" + << path_1 << "/" << ss_1.str(); + + return ss.str(); + } + + bool compare_paths(const std::string& lhs, + const std::string& rhs) + { + auto lhs_parts = utils::split(lhs, "/"); + auto rhs_parts = utils::split(rhs, "/"); + + if(lhs_parts.size()==rhs_parts.size()) + { + std::size_t N = lhs_parts.size(); + + if(N<=2 and lhs_parts.at(1)!=rhs_parts.at(1)) + { + return (lhs_parts.at(1)=3 and + rhs_parts.size()>=3) + { + std::size_t Nlhs = lhs_parts.size(); + std::size_t Nrhs = rhs_parts.size(); + + if(Nlhs<=2 and Nrhs<=2 and lhs_parts.at(1)!=rhs_parts.at(1)) + { + return (lhs_parts.at(1) before year 2010.", + "Publications of before year 2010." ], [ - "numval", - "ival", - 18391264192891079539, + "reference", + "title", + 14190244699299580163, "TEXT", - "#/texts/137", + "#/texts/21", 1.0, - 15441160910541481791, - 3518619573290839093, - 18446744073709551615, - 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 5938585443202988569, + 13176291235909531708, + null, + null, + 3, + 39, + 3, + 39, + 2, + 9, true, - "23", - "23" + "Publications of before year", + "Publications of before year" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 14190244699299580163, "TEXT", - "#/texts/137", + "#/texts/21", 1.0, - 15441160910541481543, - 3518617976696906498, - 18446744073709551615, - 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 7049010920607555536, + 14574446082424609224, + null, + null, + 3, + 15, + 3, + 15, + 2, + 3, true, - "08", - "08" + "Publications", + "Publications" ], [ - "link", - "url", - 18391264192891079539, + "expression", + "wtoken-concatenation", + 14190244699299580163, "TEXT", - "#/texts/137", + "#/texts/21", 1.0, - 8536069645534292969, - 16063604623463467342, - 18446744073709551615, - 18446744073709551615, + 14650948670182226136, + 16086954502817773001, + null, + null, + 19, + 27, + 19, + 27, + 4, + 7, + true, + "", + "" + ], + [ + "term", + "single-term", + 14190244699299580163, + "TEXT", + "#/texts/21", + 1.0, + 14650948670182226136, + 16086954502817773001, + null, + null, + 19, + 27, + 19, + 27, + 4, + 7, + true, + "", + "" + ], + [ + "term", + "single-term", + 14190244699299580163, + "TEXT", + "#/texts/21", + 1.0, + 389609625740550397, + 16512324461665891687, + null, + null, 35, - 87, + 39, 35, - 87, + 39, 8, - 25, + 9, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "year", + "year" ], [ - "link", - "url", - 18391264192891079539, + "reference", + "date", + 14190244699299580163, "TEXT", - "#/texts/137", + "#/texts/21", 1.0, - 594099663775968682, - 14698211805947073928, - 18446744073709551615, - 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 389609625548777062, + 16322066304153845813, + null, + null, + 40, + 45, + 40, + 45, + 9, + 11, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "2010", + "2010." ], [ - "link", - "doi", - 18391264192891079539, + "sentence", + "improper", + 1376279050886549305, "TEXT", - "#/texts/137", + "#/texts/22", 1.0, - 1697220653346092555, - 8458710314769009562, - 18446744073709551615, - 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 15441160910541486849, + 3139716774804028048, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "c.", + "c." ], [ - "numval", - "ival", - 1080447728722590402, + "reference", + "authors", + 1376279050886549305, "TEXT", - "#/texts/138", + "#/texts/22", 1.0, - 15441160910541481977, - 12490742773547210041, - 18446744073709551615, - 18446744073709551615, + 17767354399704235211, + 4876440209134886407, + null, + null, 0, 2, 0, 2, 0, - 1, + 2, true, - "13", - "13" + "c", + "c." ], [ - "numval", - "ival", - 4361549257087816853, + "sentence", + "proper", + 1376279050886549305, "TEXT", - "#/texts/139", + "#/texts/22", 1.0, - 15441160910541481979, - 9983816787922721487, - 18446744073709551615, - 18446744073709551615, + 11828744795764754421, + 9031682916292278032, + null, + null, 3, - 5, + 29, 3, - 5, - 1, + 29, 2, + 8, true, - "15", - "15" + "Maps of the Permian basin.", + "Maps of the Permian basin." ], [ - "numval", - "ival", - 8207961846673301043, + "reference", + "title", + 1376279050886549305, "TEXT", - "#/texts/140", + "#/texts/22", 1.0, - 17767354399704235159, - 15458436803011088578, - 18446744073709551615, - 18446744073709551615, - 23, - 24, - 23, - 24, - 4, - 5, + 9449659440238098202, + 18118593112648843891, + null, + null, + 3, + 29, + 3, + 29, + 2, + 8, true, - "7", - "7" + "Maps of the Permian basin", + "Maps of the Permian basin." ], [ - "numval", - "fval", - 11998199584890640594, + "term", + "single-term", + 1376279050886549305, "TEXT", - "#/texts/141", + "#/texts/22", 1.0, - 14652250303396477617, - 6263954298368962822, - 18446744073709551615, - 18446744073709551615, - 457, - 465, - 457, - 465, - 94, - 95, + 389609625541180066, + 844236868687538702, + null, + null, + 3, + 7, + 3, + 7, + 2, + 3, true, - "0.75-0.9", - "0.75-0.9" + "Maps", + "Maps" ], [ - "numval", - "fval", - 11998199584890640594, + "term", + "single-term", + 1376279050886549305, "TEXT", - "#/texts/141", + "#/texts/22", 1.0, - 389609625535995626, - 11162238664629223042, - 18446744073709551615, - 18446744073709551615, - 631, - 635, - 629, - 633, - 132, - 133, + 13962245658001463579, + 14601050113340142397, + null, + null, + 15, + 28, + 15, + 28, + 5, + 7, true, - "0.97", - "0.97" + "Permian basin", + "Permian basin" ], [ - "numval", - "ival", - 11998199584890640594, + "sentence", + "improper", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 17767354399704235161, - 17845175019612967856, - 18446744073709551615, - 18446744073709551615, - 264, - 265, - 264, - 265, - 49, - 50, + 15441160910541487298, + 18395000339474183225, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, true, - "1", - "1" + "d.", + "d." ], [ - "numval", - "ival", - 11998199584890640594, + "sentence", + "proper", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482672, - 15292900460193668121, - 18446744073709551615, - 18446744073709551615, - 282, - 284, - 282, - 284, - 55, - 56, + 11529297519432858487, + 4227353319390710547, + null, + null, + 3, + 112, + 3, + 112, + 2, + 20, true, - "-1", - "-1" + "Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", + "Geological formations from the Miocene age with their depth, thickness, geographic location, and composition." ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482673, - 15292900459317583926, - 18446744073709551615, - 18446744073709551615, - 289, - 291, - 289, - 291, - 58, - 59, + 11536091645160224997, + 11675986273674768837, + null, + null, + 3, + 24, + 3, + 24, + 2, + 4, true, - "-2", - "-2" + "Geological formations", + "Geological formations" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482674, - 15292900461018240016, - 18446744073709551615, - 18446744073709551615, - 296, - 298, - 296, - 298, - 61, - 62, + 13913749731470667949, + 3850638292102934182, + null, + null, + 34, + 45, + 34, + 45, + 6, + 8, true, - "-3", - "-3" + "Miocene age", + "Miocene age" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482676, - 15292900461174373895, - 18446744073709551615, - 18446744073709551615, - 307, - 309, - 307, - 309, - 65, - 66, + 329104162100250438, + 13610267414365582951, + null, + null, + 57, + 62, + 57, + 62, + 10, + 11, true, - "-5", - "-5" + "depth", + "depth" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482672, - 15292900460193644573, - 18446744073709551615, - 18446744073709551615, - 426, - 428, - 426, - 428, - 87, - 88, + 3504050857170707483, + 596059642443336109, + null, + null, + 64, + 73, + 64, + 73, + 12, + 13, true, - "-1", - "-1" + "thickness", + "thickness" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 17767354399704235163, - 17845175019597634812, - 18446744073709551615, - 18446744073709551615, - 484, - 485, - 484, - 485, - 99, - 100, + 10848824456461591623, + 7386525518787609810, + null, + null, + 75, + 94, + 75, + 94, + 14, + 16, true, - "3", - "3" + "geographic location", + "geographic location" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "enum-term-mark-2", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 17767354399704235156, - 17845175019331480896, - 18446744073709551615, - 18446744073709551615, - 489, - 490, - 489, - 490, - 101, - 102, + 6404388065355556380, + 1414583348578000819, + null, + null, + 86, + 111, + 86, + 111, + 15, + 19, true, - "4", - "4" + "location, and composition", + "location, and composition" ], [ - "numval", - "ival", - 11998199584890640594, + "term", + "single-term", + 10155628801693924200, "TEXT", - "#/texts/141", + "#/texts/23", 1.0, - 15441160910541482676, - 15292900461174286862, - 18446744073709551615, - 18446744073709551615, - 601, - 603, - 601, - 603, - 125, - 126, + 14749101077007455096, + 6683642016798435769, + null, + null, + 100, + 111, + 100, + 111, + 18, + 19, true, - "-5", - "-5" + "composition", + "composition" ], [ - "numval", - "ival", - 16446129547721407877, + "sentence", + "improper", + 9107499507097280105, "TEXT", - "#/texts/142", + "#/texts/24", 1.0, - 17767354399704235158, - 11362596522813034737, - 18446744073709551615, - 18446744073709551615, + 15441160910541487235, + 11864515451990234441, + null, + null, 0, - 1, + 2, 0, - 1, + 2, 0, - 1, + 2, true, - "6", - "6" + "e.", + "e." ], [ - "numval", - "ival", - 6720443978031524294, + "sentence", + "proper", + 9107499507097280105, "TEXT", - "#/texts/143", + "#/texts/24", 1.0, - 17767354399704235161, - 16606870843966802051, - 18446744073709551615, - 18446744073709551615, - 521, - 522, - 521, - 522, - 82, - 83, + 1974328525313479394, + 15740602897253173811, + null, + null, + 3, + 94, + 3, + 94, + 2, + 16, true, - "1", - "1" + "List all high-Tc superconductors with their known crystallographic and material properties?", + "List all high-Tc superconductors with their known crystallographic and material properties?" ], [ - "numval", - "ival", - 6720443978031524294, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/143", + "#/texts/24", 1.0, - 17767354399704235162, - 16606870838110795262, - 18446744073709551615, - 18446744073709551615, - 579, - 580, - 579, - 580, - 95, - 96, + 389609625527096807, + 17993706797399040827, + null, + null, + 3, + 7, + 3, + 7, + 2, + 3, true, - "2", - "2" + "List", + "List" ], [ - "numval", - "year", - 18391264192891079539, + "expression", + "word-concatenation", + 9107499507097280105, "TEXT", - "#/texts/144", + "#/texts/24", 1.0, - 389609625548777262, - 8826555294676663632, - 18446744073709551615, - 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 8106397471578324091, + 17544472309867440760, + null, + null, + 12, + 19, + 12, + 19, + 4, + 7, true, - "2020", - "2020" + "high-Tc", + "high-Tc" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/144", + "#/texts/24", 1.0, - 389609625548777251, - 8826555296349648778, - 18446744073709551615, - 18446744073709551615, - 119, - 123, - 119, - 123, - 34, + 40139023213095902, + 5870111245743199103, + null, + null, + 17, + 35, + 17, 35, + 6, + 8, true, - "2023", - "2023" + "Tc superconductors", + "Tc superconductors" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 9107499507097280105, "TEXT", - "#/texts/144", + "#/texts/24", 1.0, - 8104408072666212335, - 13552219042525319352, - 18446744073709551615, - 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 3841511266640975261, + 8141116605713377189, + null, + null, + 74, + 93, + 74, + 93, + 13, + 15, true, - "10.1002", - "10.1002" + "material properties", + "material properties" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "proper", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 389609625548868096, - 8826558551385119058, - 18446744073709551615, - 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 11815587436253641919, + 9694283959050279543, + null, + null, + 0, + 163, + 0, + 163, + 0, + 33, true, - "2.20", - "2.20" + "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions).", + "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions)." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 14654386914267794441, - 12796143052106760105, - 18446744073709551615, - 18446744073709551615, + 14650942982668217094, + 17302408507521948522, + null, + null, 0, 8, 0, @@ -7926,1154 +8226,1217 @@ 0, 1, true, - "26895595", - "26895595" + "Question", + "Question" ], [ - "numval", - "ival", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 17767354399704235162, - 7753390158484899261, - 18446744073709551615, - 18446744073709551615, - 16, - 17, - 16, - 17, + 12178341415896394054, + 11564909962300040492, + null, + null, + 9, + 12, + 9, + 12, + 1, 4, - 5, true, - "2", - "2" + "(a)", + "(a)" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 15441160910541481791, - 3518619573290839093, - 18446744073709551615, - 18446744073709551615, - 113, - 115, - 113, - 115, - 30, - 31, + 1081977986607740386, + 13369606897380109283, + null, + null, + 34, + 57, + 34, + 57, + 7, + 10, true, - "23", - "23" + "classic search paradigm", + "classic search paradigm" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 15441160910541481543, - 3518617976696906498, - 18446744073709551615, - 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 4504082466399500918, + 14073774627107365452, + null, + null, + 87, + 100, + 87, + 100, + 17, + 19, true, - "08", - "08" + "search engine", + "search engine" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 8536069645534292969, - 16063604623463467342, - 18446744073709551615, - 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 12002758730476261783, + 6168539106973887837, + null, + null, + 111, + 125, + 111, + 125, + 22, + 24, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "number sources", + "number sources" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 594099663775968682, - 14698211805947073928, - 18446744073709551615, - 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 16604364587013013096, + 5886022793160196344, + null, + null, + 131, + 144, + 131, + 144, + 25, + 27, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "exact answers", + "exact answers" ], [ - "link", - "doi", - 18391264192891079539, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/144", + "#/texts/25", 1.0, - 1697220653346092555, - 8458710314769009562, - 18446744073709551615, - 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 12379829975541768606, + 13865488235026578313, + null, + null, + 145, + 162, + 145, + 162, + 27, + 32, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "(ie, definitions)", + "(ie, definitions)" ], [ - "numval", - "ival", - 2144926730621142072, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/145", + "#/texts/25", 1.0, - 15441160910541481978, - 18064563043183731132, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 15441160910541486545, + 12227689146572455673, + null, + null, + 146, + 148, + 146, + 148, + 28, + 29, true, - "14", - "14" + "ie", + "ie" ], [ - "numval", - "ival", - 2144926730621142072, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/145", + "#/texts/25", 1.0, - 15441160910541481979, - 18064563042796865823, - 18446744073709551615, - 18446744073709551615, - 4, - 6, - 4, - 6, - 2, - 3, + 1536294900910083314, + 8031917503160766119, + null, + null, + 150, + 161, + 150, + 161, + 30, + 31, true, - "15", - "15" + "definitions", + "definitions" ], [ - "numval", - "ival", - 14222671032550229818, + "sentence", + "proper", + 7248467870339433322, "TEXT", - "#/texts/146", + "#/texts/25", 1.0, - 17767354399704235163, - 2699991593779864855, - 18446744073709551615, - 18446744073709551615, - 24, - 25, - 24, - 25, - 6, - 7, + 414059460071051178, + 9537463599014151627, + null, + null, + 164, + 271, + 164, + 271, + 33, + 53, true, - "3", - "3" + "Likewise, question (b) can be easily answered through metadata based filter rules on a literature database.", + "Likewise, question (b) can be easily answered through metadata based filter rules on a literature database." ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 10302035827600178331, - 6710097973531677104, - 18446744073709551615, - 18446744073709551615, - 36, - 45, + 14637920976934857672, + 10531673380158100191, + null, + null, + 174, + 182, + 174, + 182, + 35, 36, - 45, - 14, - 15, true, - "0000-0002", - "0000-0002" + "question", + "question" ], [ - "numval", - "irng", - 3523281823889115814, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 6624857390961351666, - 3541555616013892515, - 18446744073709551615, - 18446744073709551615, - 46, - 55, - 46, - 55, - 16, - 17, + 12178341415896394119, + 11564909936483728813, + null, + null, + 183, + 186, + 183, + 186, + 36, + 39, true, - "8088-0823", - "8088-0823" + "(b)", + "(b)" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 10302035827600178332, - 6710097973532471075, - 18446744073709551615, - 18446744073709551615, - 88, - 97, - 88, - 97, - 27, - 28, + 14638347573453462708, + 13381141265012229755, + null, + null, + 218, + 226, + 218, + 226, + 44, + 45, true, - "0000-0001", - "0000-0001" + "metadata", + "metadata" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 6560223242063427106, - 13609528576140932418, - 18446744073709551615, - 18446744073709551615, - 98, - 107, - 98, - 107, - 29, - 30, + 11809545502212496257, + 8478007624491184080, + null, + null, + 233, + 245, + 233, + 245, + 46, + 48, true, - "7216-8505", - "7216-8505" + "filter rules", + "filter rules" ], [ - "numval", - "irng", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 10302035827600178332, - 6710097973532498930, - 18446744073709551615, - 18446744073709551615, - 141, - 150, - 141, - 150, - 40, - 41, + 15951062515149504329, + 16980500711213785871, + null, + null, + 251, + 270, + 251, + 270, + 50, + 52, true, - "0000-0001", - "0000-0001" + "literature database", + "literature database" ], [ - "numval", - "irng", - 3523281823889115814, + "sentence", + "proper", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 6573923715856392023, - 13497670743408223376, - 18446744073709551615, - 18446744073709551615, - 151, - 160, - 151, - 160, - 42, - 43, + 4249137505398623360, + 15817638664358866975, + null, + null, + 272, + 486, + 272, + 486, + 53, + 94, true, - "5761-0422", - "5761-0422" + "Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done.", + "Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done." ], [ - "link", - "url", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 7086030415698247677, - 10516035679311822965, - 18446744073709551615, - 18446744073709551615, - 18, - 55, - 18, - 55, - 6, - 17, + 14650942982668217094, + 17302408507521932942, + null, + null, + 272, + 280, + 272, + 280, + 53, + 54, true, - "https://orcid.org/0000-0002-8088-0823", - "https://orcid.org/0000-0002-8088-0823" + "Question", + "Question" ], [ - "link", - "url", - 3523281823889115814, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 2033258390552333901, - 14596379607593903375, - 18446744073709551615, - 18446744073709551615, - 70, - 107, - 70, - 107, - 19, - 30, + 12178341415896393924, + 11564909955844336362, + null, + null, + 281, + 284, + 281, + 284, + 54, + 57, true, - "https://orcid.org/0000-0001-7216-8505", - "https://orcid.org/0000-0001-7216-8505" + "(c)", + "(c)" ], [ - "link", - "url", - 3523281823889115814, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/152", + "#/texts/25", 1.0, - 2031879929749239141, - 13323569836539834175, - 18446744073709551615, - 18446744073709551615, - 123, - 160, - 123, - 160, - 32, - 43, + 16381206569053819062, + 10460140271249301149, + null, + null, + 307, + 313, + 307, + 313, + 60, + 61, true, - "https://orcid.org/0000-0001-5761-0422", - "https://orcid.org/0000-0001-5761-0422" + "extent", + "extent" ], [ - "link", - "url", - 7813503946963688644, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/154", + "#/texts/25", 1.0, - 3527101060180289873, - 4288347075719597580, - 18446744073709551615, - 18446744073709551615, - 30, - 52, - 30, - 52, - 6, - 15, + 5329435588693387761, + 15530791178865888084, + null, + null, + 317, + 333, + 317, + 333, + 62, + 64, true, - "https://www.elastic.co", - "https://www.elastic.co" + "domain knowledge", + "domain knowledge" ], [ - "link", - "url", - 7813503946963688644, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/154", + "#/texts/25", 1.0, - 7381438071617048818, - 3762754436696500331, - 18446744073709551615, - 18446744073709551615, - 72, - 97, - 72, - 97, - 19, - 28, + 329104161610777240, + 9405584604279882191, + null, + null, + 353, + 358, + 353, + 358, + 69, + 70, true, - "https://lucene.apache.org", - "https://lucene.apache.org" + "model", + "model" ], [ - "link", - "url", - 7813503946963688644, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/154", + "#/texts/25", 1.0, - 7699234159584878934, - 8720273332387288393, - 18446744073709551615, - 18446744073709551615, - 38, - 52, - 38, - 52, - 10, - 15, + 6165970819764784401, + 80764262191222596, + null, + null, + 386, + 395, + 386, + 395, + 74, + 75, true, - "www.elastic.co", - "www.elastic.co" + "relevance", + "relevance" ], [ - "link", - "url", - 1997735398126013155, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/156", + "#/texts/25", 1.0, - 11080755855567888942, - 12138756017738546093, - 18446744073709551615, - 18446744073709551615, - 4, - 24, - 2, - 22, - 1, - 10, + 389609625618383420, + 8585944591956156333, + null, + null, + 409, + 413, + 409, + 413, + 78, + 79, true, - "https://www.nltk.org", - "https://www.nltk.org" + "maps", + "maps" ], [ - "link", - "url", - 1997735398126013155, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/156", + "#/texts/25", 1.0, - 7030452472279930374, - 3139262024232962844, - 18446744073709551615, - 18446744073709551615, - 12, - 24, - 10, - 22, - 5, - 10, + 329104158730975457, + 12415524210016868054, + null, + null, + 421, + 426, + 421, + 426, + 81, + 82, true, - "www.nltk.org", - "www.nltk.org" + "query", + "query" ], [ - "numval", - "ival", - 4925537010788978399, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/158", + "#/texts/25", 1.0, - 17767354399704235161, - 13902073100028876379, - 18446744073709551615, - 18446744073709551615, - 148, - 149, - 147, - 148, - 29, - 30, + 11130817838525238749, + 11909014372498478623, + null, + null, + 449, + 471, + 449, + 471, + 87, + 90, true, - "1", - "1" + "manual curation effort", + "manual curation effort" ], [ - "numval", - "ival", - 16552665876195410077, + "sentence", + "proper", + 7248467870339433322, "TEXT", - "#/texts/159", + "#/texts/25", 1.0, - 17767354399704235156, - 1305421191768306174, - 18446744073709551615, - 18446744073709551615, - 18, - 19, - 18, - 19, - 4, - 5, + 16737319616048446271, + 14108564952457254186, + null, + null, + 487, + 674, + 487, + 674, + 94, + 129, true, - "4", - "4" + "Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source.", + "Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source." ], [ - "numval", - "year", - 17579390613842440572, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 389609625548777059, - 14748978429801291102, - 18446744073709551615, - 18446744073709551615, - 178, - 182, - 174, - 178, - 52, - 53, + 2906549781684343771, + 1487082764881216534, + null, + null, + 487, + 496, + 487, + 496, + 94, + 95, true, - "2015", - "2015" + "Questions", + "Questions" ], [ - "numval", - "ival", - 17579390613842440572, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 17767354399704235163, - 14663762662264921246, - 18446744073709551615, - 18446744073709551615, - 73, - 74, - 69, - 70, - 15, - 16, + 12178341415896393989, + 11564909965731230263, + null, + null, + 497, + 500, + 497, + 500, + 95, + 98, true, - "3", - "3" + "(d)", + "(d)" ], [ - "numval", - "ival", - 17579390613842440572, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 17767354399704235156, - 14663762663007797994, - 18446744073709551615, - 18446744073709551615, - 136, - 137, - 132, - 133, - 35, - 36, + 12178341415896394307, + 11564909949268562290, + null, + null, + 505, + 508, + 505, + 508, + 99, + 102, true, - "4", - "4" + "(e)", + "(e)" ], [ - "numval", - "ival", - 17579390613842440572, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 15441160910541481913, - 12659057306413090614, - 18446744073709551615, - 18446744073709551615, - 183, - 185, - 179, - 181, - 54, - 55, + 14669513449876101491, + 3515293922915694043, + null, + null, + 527, + 545, + 527, + 545, + 104, + 106, true, - "02", - "02" + "query capabilities", + "query capabilities" ], [ - "numval", - "ival", - 17579390613842440572, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 17767354399704235156, - 14663762663007808920, - 18446744073709551615, - 18446744073709551615, - 189, - 190, - 185, - 186, - 57, - 58, + 3392901146434670347, + 1719180218007220136, + null, + null, + 594, + 609, + 594, + 609, + 113, + 115, true, - "4", - "4" + "manual curation", + "manual curation" ], [ - "link", - "url", - 17579390613842440572, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 3438649888016089446, - 14315872303660489441, - 18446744073709551615, - 18446744073709551615, - 65, - 127, - 61, - 123, - 10, - 32, + 3099738386292325982, + 1290161288237999950, + null, + null, + 655, + 673, + 655, + 673, + 125, + 128, true, - "http://s3.thinkaurelius.com/docs/titan/current/data-model.html", - "http://s3.thinkaurelius.com/docs/titan/current/data-model.html" + "single data source", + "single data source" ], [ - "link", - "url", - 17579390613842440572, + "sentence", + "proper", + 7248467870339433322, "TEXT", - "#/texts/160", + "#/texts/25", 1.0, - 9361941850829391161, - 1324878578738734655, - 18446744073709551615, - 18446744073709551615, - 140, - 209, - 136, - 205, - 38, - 63, + 7228458266121330253, + 7626297788632302031, + null, + null, + 675, + 869, + 675, + 869, + 129, + 164, true, - "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html", - "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html" + "These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", + "These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties)." ], [ - "numval", - "ival", - 722212543953276862, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/161", + "#/texts/25", 1.0, - 17767354399704235156, - 17688058591094674309, - 18446744073709551615, - 18446744073709551615, - 19, - 20, - 15, - 16, - 4, - 5, + 6168848426972573469, + 13229835019663629347, + null, + null, + 681, + 690, + 681, + 690, + 130, + 131, true, - "4", - "4" + "questions", + "questions" ], [ - "link", - "url", - 722212543953276862, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/161", + "#/texts/25", 1.0, - 12568677210829628871, - 1680746501251640588, - 18446744073709551615, - 18446744073709551615, - 105, - 139, - 101, - 135, - 21, - 35, + 16381206550376895780, + 9515784264781594172, + null, + null, + 703, + 709, + 703, + 709, + 133, + 134, true, - "https://db-engines.com/en/ranking_", - "https://db-engines.com/en/ranking_" + "system", + "system" ], [ - "numval", - "ival", - 11085577343317113173, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/162", + "#/texts/25", 1.0, - 12178341415896310600, - 9970685264370540412, - 18446744073709551615, - 18446744073709551615, - 17, - 20, - 15, - 18, - 6, - 7, + 528741001868643171, + 9976817434875577411, + null, + null, + 727, + 749, + 727, + 749, + 138, + 141, true, - "500", - "500" + "complex data structure", + "complex data structure" ], [ - "link", - "url", - 11085577343317113173, + "parenthesis", + "round brackets", + 7248467870339433322, "TEXT", - "#/texts/162", + "#/texts/25", 1.0, - 1244385257359010144, - 3127203609822040452, - 18446744073709551615, - 18446744073709551615, - 5, - 25, - 3, - 23, - 1, - 10, + 7548253312880200059, + 14338346281668154436, + null, + null, + 750, + 868, + 750, + 868, + 141, + 163, true, - "http://graph500.org/", - "http://graph500.org/" + "(eg, a table in which the rows list the formations or materials while the columns contain their respective properties)", + "(eg, a table in which the rows list the formations or materials while the columns contain their respective properties)" ], [ - "reference", - "url", - 1792096630133661292, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/163", + "#/texts/25", 1.0, - 16747146533825186967, - 2165348395015827092, - 18446744073709551615, - 18446744073709551615, - 0, - 54, - 0, - 52, - 0, - 18, + 15441160910541487324, + 12227689149305300000, + null, + null, + 751, + 753, + 751, + 753, + 142, + 143, true, - "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html" + "eg", + "eg" ], [ - "reference", - "url", - 16611805225457383637, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/165", + "#/texts/25", 1.0, - 4512570954370983408, - 11763158631698282386, - 18446744073709551615, - 18446744073709551615, - 0, - 75, - 0, - 69, - 0, - 23, + 329104159216638303, + 12465464266404107462, + null, + null, + 757, + 762, + 757, + 762, + 145, + 146, true, - "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + "table", + "table" ], [ - "reference", - "url", - 1531505125666754945, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/166", + "#/texts/25", 1.0, - 16922240937803157180, - 3329452043224775053, - 18446744073709551615, - 18446744073709551615, - 0, - 43, - 0, - 37, - 0, - 11, + 389609625632815211, + 8556557572644543816, + null, + null, + 776, + 780, + 776, + 780, + 149, + 150, true, - "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "\u2021\u2021\u2021 https://www.naturalearthdata.com/" + "rows", + "rows" ], [ - "reference", - "url", - 15684389308320953629, + "term", + "enum-term-mark-3", + 7248467870339433322, "TEXT", - "#/texts/167", + "#/texts/25", 1.0, - 2845896203864732456, - 4760469342904968768, - 18446744073709551615, - 18446744073709551615, - 0, - 36, - 0, - 33, - 0, - 11, + 8705710812738155139, + 4173932638461788376, + null, + null, + 790, + 813, + 790, + 813, + 152, + 155, true, - "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/" + "formations or materials", + "formations or materials" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/169", + "#/texts/25", 1.0, - 11879540473470058199, - 12427853451193245392, - 18446744073709551615, - 18446744073709551615, - 3, - 17, - 3, - 17, - 2, - 5, + 16064217528453934834, + 17904668929606079942, + null, + null, + 790, + 800, + 790, + 800, + 152, + 153, true, - "Staar Peter WJ", - "Staar Peter WJ" + "formations", + "formations" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/169", + "#/texts/25", 1.0, - 6613162031266505134, - 16138057201536909006, - 18446744073709551615, - 18446744073709551615, - 19, - 28, - 19, - 28, - 6, - 8, + 6179392753523812130, + 4193644628432114698, + null, + null, + 804, + 813, + 804, + 813, + 154, + 155, true, - "Michele D", - "Michele D" + "materials", + "materials" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/169", + "#/texts/25", 1.0, - 4457167794784606628, - 16487730286724222122, - 18446744073709551615, - 18446744073709551615, - 30, - 41, - 30, - 41, - 9, - 11, + 8106398484785590092, + 4216933149211892873, + null, + null, + 824, + 831, + 824, + 831, + 157, + 158, true, - "Christoph A", - "Christoph A" + "columns", + "columns" ], [ - "reference", - "author", - 10480452763767134455, + "term", + "single-term", + 7248467870339433322, "TEXT", - "#/texts/169", + "#/texts/25", 1.0, - 6560601913145533820, - 12701816617387729389, - 18446744073709551615, - 18446744073709551615, - 43, - 52, - 43, - 52, - 12, - 15, + 10514013392853408912, + 13055603857313609190, + null, + null, + 846, + 867, + 846, + 867, + 160, + 162, true, - "Costas B.", - "Costas B." + "respective properties", + "respective properties" ], [ - "reference", - "citation-number", - 10480452763767134455, + "sentence", + "improper", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 17767354399704235161, - 16208788960124925205, - 18446744073709551615, - 18446744073709551615, + 9732050976592056956, + 7055672841020251338, + null, + null, 0, - 1, + 140, 0, - 1, + 140, 0, - 1, + 23, true, - "1", - "1" + "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", + "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:" ], [ - "reference", - "container-title", - 10480452763767134455, + "term", + "single-term", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 8106351470704634736, - 17995829417296331915, - 18446744073709551615, - 18446744073709551615, - 138, - 145, - 138, - 145, - 29, - 32, + 16652112846725585848, + 6419752225232606784, + null, + null, + 20, + 34, + 20, + 34, + 3, + 5, true, - "KDD '18", - "KDD '18" + "above examples", + "above examples" ], [ - "reference", - "date", - 10480452763767134455, + "term", + "single-term", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 8104408419226439021, - 7524634383995046949, - 18446744073709551615, - 18446744073709551615, - 164, - 171, - 164, - 171, - 39, - 42, + 14652282445985817695, + 8459337319831918928, + null, + null, + 71, + 79, + 71, + 79, + 11, + 12, true, - "; 2018:", - "; 2018:" + "criteria", + "criteria" ], [ - "reference", - "location", - 10480452763767134455, + "term", + "single-term", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 6517026456739326224, - 8283202906327186871, - 18446744073709551615, - 18446744073709551615, - 147, - 160, - 147, - 160, - 33, - 38, + 16381206550376895780, + 3357521497593263256, + null, + null, + 86, + 92, + 86, + 92, + 14, + 15, true, - "New York, NY:", - "New York, NY:" + "system", + "system" ], [ - "reference", - "pages", - 10480452763767134455, + "term", + "single-term", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 8104408789160133341, - 11698475954970405279, - 18446744073709551615, - 18446744073709551615, - 171, - 178, - 171, - 178, - 42, - 43, + 13671659409933113155, + 17581719947633067695, + null, + null, + 107, + 128, + 107, + 128, + 17, + 20, true, - "774-782", - "774-782" + "deep data exploration", + "deep data exploration" ], [ - "reference", - "publisher", - 10480452763767134455, + "term", + "single-term", + 13346892078888080449, "TEXT", - "#/texts/169", + "#/texts/26", 1.0, - 12178341415896228980, - 16661682738511655292, - 18446744073709551615, - 18446744073709551615, - 161, - 164, - 161, - 164, - 38, - 39, + 8106398483106473371, + 5210834614831011291, + null, + null, + 132, + 139, + 132, + 139, + 21, + 22, true, - "ACM", - "ACM" + "corpora", + "corpora" ], [ - "reference", - "title", - 10480452763767134455, + "numval", + "ival", + 1118972765223422660, "TEXT", - "#/texts/169", + "#/texts/27", 1.0, - 3346237141252876309, - 13011534883222988606, - 18446744073709551615, - 18446744073709551615, - 53, - 136, - 53, - 136, - 15, - 28, + 17767354399704235161, + 16395526852875690261, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, true, - "Corpus conversion service: a machine learning platform to ingest documents at scale", - "Corpus conversion service: a machine learning platform to ingest documents at scale" + "1", + "1" ], [ - "reference", - "author", - 11866471329779366855, + "sentence", + "improper", + 1118972765223422660, "TEXT", - "#/texts/170", + "#/texts/27", 1.0, - 11879540473470058199, - 6818801233014041471, - 18446744073709551615, - 18446744073709551615, - 3, - 17, - 3, - 17, + 17767354399704235166, + 16395526851487480286, + null, + null, + 1, + 2, + 1, + 2, + 1, 2, - 5, true, - "Staar Peter WJ", - "Staar Peter WJ" + ".", + "." ], [ - "reference", - "author", - 11866471329779366855, + "sentence", + "proper", + 1118972765223422660, "TEXT", - "#/texts/170", + "#/texts/27", 1.0, - 329104159232588720, - 1186563503698797045, - 18446744073709551615, - 18446744073709551615, - 19, + 14523617476315776232, + 15368741834253149117, + null, + null, + 3, + 111, + 3, + 111, + 2, + 20, + true, + "It can answer queries by combining different data elements from different sources into a new data structure.", + "It can answer queries by combining different data elements from different sources into a new data structure." + ], + [ + "term", + "single-term", + 1118972765223422660, + "TEXT", + "#/texts/27", + 1.0, + 8106477782290185579, + 4397511644820592752, + null, + null, + 17, 24, - 19, + 17, 24, + 5, 6, - 8, true, - "Kl BP", - "Kl BP" + "queries", + "queries" ], [ - "reference", - "author", - 11866471329779366855, + "term", + "single-term", + 1118972765223422660, "TEXT", - "#/texts/170", + "#/texts/27", 1.0, - 14652187939873997159, - 718674333250886747, - 18446744073709551615, - 18446744073709551615, - 26, - 34, - 26, - 34, - 9, + 6804442699501962146, + 17203402041390290286, + null, + null, + 38, + 61, + 38, + 61, + 8, 11, true, - "Roxana I", - "Roxana I" + "different data elements", + "different data elements" ], [ - "reference", - "citation-number", - 11866471329779366855, + "term", + "single-term", + 1118972765223422660, "TEXT", - "#/texts/170", + "#/texts/27", + 1.0, + 600429551108811238, + 12839428366267894769, + null, + null, + 67, + 84, + 67, + 84, + 12, + 14, + true, + "different sources", + "different sources" + ], + [ + "term", + "single-term", + 1118972765223422660, + "TEXT", + "#/texts/27", + 1.0, + 12659510570308685827, + 16006829420770455202, + null, + null, + 92, + 110, + 92, + 110, + 16, + 19, + true, + "new data structure", + "new data structure" + ], + [ + "numval", + "ival", + 324023167304456371, + "TEXT", + "#/texts/28", 1.0, 17767354399704235162, - 7639029136784882071, - 18446744073709551615, - 18446744073709551615, + 964743056782930174, + null, + null, 0, 1, 0, @@ -9085,184 +9448,205 @@ "2" ], [ - "reference", - "date", - 11866471329779366855, + "sentence", + "improper", + 324023167304456371, "TEXT", - "#/texts/170", + "#/texts/28", 1.0, - 325347433255123998, - 9431696322833619113, - 18446744073709551615, - 18446744073709551615, - 150, - 162, - 150, - 162, - 36, - 39, + 17767354399704235166, + 964743056328104984, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, true, - "2016:812-821", - "2016:812-821" + ".", + "." ], [ - "reference", - "journal", - 11866471329779366855, + "sentence", + "proper", + 324023167304456371, "TEXT", - "#/texts/170", + "#/texts/28", 1.0, - 8106350741667376964, - 2037770047407614341, - 18446744073709551615, - 18446744073709551615, - 131, - 138, - 131, - 138, - 30, - 31, + 5902071177970408282, + 15210357510825208849, + null, + null, + 3, + 117, + 3, + 117, + 2, + 24, true, - "Chicago", - "Chicago" + "It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way.", + "It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way." ], [ + "parenthesis", "reference", - "publisher", - 11866471329779366855, + 324023167304456371, "TEXT", - "#/texts/170", + "#/texts/28", 1.0, - 329104161865740710, - 2100895836958644546, - 18446744073709551615, - 18446744073709551615, - 144, - 149, - 144, - 149, - 34, - 36, + 12178341415896395122, + 294993208777838466, + null, + null, + 15, + 18, + 15, + 18, + 4, + 7, true, - "IEEE;", - "IEEE;" + "(1)", + "(1)" ], [ - "reference", - "title", - 11866471329779366855, + "numval", + "ival", + 324023167304456371, "TEXT", - "#/texts/170", + "#/texts/28", 1.0, - 7105706713138331748, - 8882313339767931673, - 18446744073709551615, - 18446744073709551615, - 43, - 129, - 43, - 129, - 15, - 29, + 17767354399704235161, + 964743056733707724, + null, + null, + 16, + 17, + 16, + 17, + 5, + 6, true, - "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance", - "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance" + "1", + "1" ], [ - "reference", - "volume", - 11866471329779366855, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/170", + "#/texts/28", 1.0, - 12178341415896263665, - 6233863430018819825, - 18446744073709551615, - 18446744073709551615, - 140, - 143, - 140, - 143, - 32, - 34, + 11554018429271009560, + 14601176146139573086, + null, + null, + 33, + 48, + 33, + 48, + 10, + 12, true, - "IL:", - "IL:" + "knowledge model", + "knowledge model" ], [ - "reference", - "author", - 6016885898370676469, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/171", + "#/texts/28", 1.0, - 14650311461945683358, - 1978144735469983705, - 18446744073709551615, - 18446744073709551615, - 3, - 11, - 3, - 11, - 2, - 4, + 332950716206334614, + 14329605590209969858, + null, + null, + 68, + 87, + 68, + 87, + 16, + 18, true, - "Matteo M", - "Matteo M" + "unstructured corpus", + "unstructured corpus" ], [ - "reference", - "author", - 6016885898370676469, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/171", + "#/texts/28", 1.0, - 4457167794784606628, - 3737697229009384388, - 18446744073709551615, - 18446744073709551615, - 13, - 24, - 13, + 11553735866943991178, + 16918357182749210763, + null, + null, + 100, + 116, + 100, + 116, + 21, + 23, + true, + "unsupervised way", + "unsupervised way" + ], + [ + "sentence", + "proper", + 324023167304456371, + "TEXT", + "#/texts/28", + 1.0, + 18302146700320907131, + 18137989444182809109, + null, + null, + 118, + 180, + 118, + 180, 24, - 5, - 7, + 37, true, - "Christoph A", - "Christoph A" + "It may profit from, but not require any manually curated data.", + "It may profit from, but not require any manually curated data." ], [ - "reference", - "author", - 6016885898370676469, + "term", + "single-term", + 324023167304456371, "TEXT", - "#/texts/171", + "#/texts/28", 1.0, - 6183363009296336817, - 2886377010043332845, - 18446744073709551615, - 18446744073709551615, - 26, - 35, - 26, + 389609625696431489, + 17273072847800545799, + null, + null, + 175, + 179, + 175, + 179, 35, - 8, - 12, + 36, true, - "Val'ery W", - "Val'ery W" + "data", + "data" ], [ - "reference", - "citation-number", - 6016885898370676469, + "numval", + "ival", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, 17767354399704235163, - 13510159049290326510, - 18446744073709551615, - 18446744073709551615, + 12716136939749916250, + null, + null, 0, 1, 0, @@ -9274,9144 +9658,75586 @@ "3" ], [ - "reference", - "date", - 6016885898370676469, + "sentence", + "improper", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, - 16381206542172555288, - 10693536807570486686, - 18446744073709551615, - 18446744073709551615, - 161, - 167, - 161, - 167, - 35, - 37, + 17767354399704235166, + 12716136938048933799, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, true, - "; 2019", - "; 2019" + ".", + "." ], [ - "reference", - "journal", - 6016885898370676469, + "sentence", + "proper", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, - 7543597897356589805, - 187532807533800461, - 18446744073709551615, - 18446744073709551615, - 141, - 151, - 141, - 151, - 28, - 32, + 5416144356132738083, + 6452265117056514365, + null, + null, + 3, + 82, + 3, + 82, + 2, + 19, true, - "ArXiv.abs/", - "ArXiv.abs/" + "It may restrict supported queries to a specific domain (eg, a technical field).", + "It may restrict supported queries to a specific domain (eg, a technical field)." ], [ - "reference", - "pages", - 6016885898370676469, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, - 329104147742543445, - 2092785390571099911, - 18446744073709551615, - 18446744073709551615, - 156, - 161, - 156, - 161, - 34, - 35, + 8106477782290185579, + 4528294841171204155, + null, + null, + 29, + 36, + 29, + 36, + 6, + 7, true, - "08400", - "08400" + "queries", + "queries" ], [ - "reference", - "title", - 6016885898370676469, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, - 14518759528420507379, - 35296972575901155, - 18446744073709551615, - 18446744073709551615, - 44, - 139, - 44, - 139, - 16, - 27, + 2648546400259159503, + 9428685315310219813, + null, + null, + 42, + 57, + 42, + 57, + 9, + 11, true, - "An information extraction and knowledge graph platform for accelerating biochemical discoveries", - "An information extraction and knowledge graph platform for accelerating biochemical discoveries" + "specific domain", + "specific domain" ], [ - "reference", - "volume", - 6016885898370676469, + "parenthesis", + "round brackets", + 4651508276868765576, "TEXT", - "#/texts/171", + "#/texts/29", 1.0, - 389609625536083120, - 2278168081688323653, - 18446744073709551615, - 18446744073709551615, - 151, - 155, - 151, - 155, - 32, - 33, + 2722729733807857233, + 7156973764189890273, + null, + null, + 58, + 81, + 58, + 81, + 11, + 18, true, - "1907", - "1907" + "(eg, a technical field)", + "(eg, a technical field)" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/172", + "#/texts/29", 1.0, - 8106352039693059414, - 189526913306248274, - 18446744073709551615, - 18446744073709551615, - 3, - 10, - 3, - 10, - 2, - 4, + 15441160910541487324, + 14197444882771576140, + null, + null, + 59, + 61, + 59, + 61, + 12, + 13, true, - "Paolo R", - "Paolo R" + "eg", + "eg" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 4651508276868765576, "TEXT", - "#/texts/172", + "#/texts/29", 1.0, - 8106471247241844081, - 12829126084417792103, - 18446744073709551615, - 18446744073709551615, - 12, - 19, - 12, - 19, - 5, - 7, + 6630151693041027733, + 5469669854747800448, + null, + null, + 65, + 80, + 65, + 80, + 15, + 17, true, - "Marco P", - "Marco P" + "technical field", + "technical field" ], [ - "reference", - "author", - 13946275785662847920, + "sentence", + "proper", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 15356089124994678984, - 18000216761919637454, - 18446744073709551615, - 18446744073709551615, - 21, - 31, - 21, - 31, - 8, - 10, + 17629874561869362054, + 1364582601176274676, + null, + null, + 0, + 103, + 0, + 103, + 0, + 16, true, - "Floriana B", - "Floriana B" + "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components.", + "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components." ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 8106352035144611657, - 2775049790770760163, - 18446744073709551615, - 18446744073709551615, - 33, - 40, - 33, - 40, - 11, - 13, + 15868223159689591859, + 16370341749731323775, + null, + null, + 12, + 22, + 12, + 22, + 3, + 4, true, - "Peter S", - "Peter S" + "objectives", + "objectives" ], [ - "reference", - "author", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 6560601913145533820, - 12130024709208567744, - 18446744073709551615, - 18446744073709551615, - 42, - 51, - 42, - 51, - 14, - 17, + 7885245284142706193, + 4158653228934455880, + null, + null, + 40, + 54, + 40, + 54, + 7, + 9, true, - "Costas B.", - "Costas B." + "CPS implements", + "CPS implements" ], [ - "reference", - "citation-number", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 17767354399704235156, - 2787669627718018145, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 1520105468889282504, + 17108999815917583587, + null, + null, + 82, + 102, + 82, + 102, + 13, + 15, true, - "4", - "4" + "essential components", + "essential components" ], [ - "reference", - "container-title", - 13946275785662847920, + "sentence", + "proper", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 4292761212337338605, - 773134743697376497, - 18446744073709551615, - 18446744073709551615, - 177, - 245, - 177, - 245, - 38, - 48, + 3384851794162116958, + 6847462864995440661, + null, + null, + 104, + 371, + 104, + 371, + 16, + 63, true, - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi", - "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi" + "The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them.", + "The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them." ], [ - "reference", - "location", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 16381206478137548706, - 9744551904329916157, - 18446744073709551615, - 18446744073709551615, - 247, - 253, - 247, - 253, - 49, - 52, + 3741141293805179509, + 17420802040208319620, + null, + null, + 108, + 123, + 108, + 123, + 17, + 19, true, - "UAE, :", - "UAE, :" + "first component", + "first component" ], [ - "reference", - "title", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 14371818679908732529, - 10294554605073457499, - 18446744073709551615, - 18446744073709551615, - 52, - 174, - 52, - 174, - 17, - 36, + 2704211529742541242, + 4905940686306094827, + null, + null, + 129, + 171, + 129, + 171, + 21, + 26, true, - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019", - "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019" + "scalable Knowledge Graph creation pipeline", + "scalable Knowledge Graph creation pipeline" ], [ - "reference", - "url", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 7742135058095281026, - 17571544217117981683, - 18446744073709551615, - 18446744073709551615, - 257, - 268, - 257, - 268, - 54, - 59, + 389609625631325904, + 9256317306341982494, + null, + null, + 212, + 216, + 212, + 216, + 33, + 34, true, - "https://doi", - "https://doi" + "text", + "text" ], [ - "reference", - "url", - 13946275785662847920, + "term", + "enum-term-mark-3", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 14023706993569865773, - 12197548886916811054, - 18446744073709551615, - 18446744073709551615, - 270, - 291, - 270, - 291, - 60, - 67, + 16462824725023446153, + 13126993570789821262, + null, + null, + 218, + 235, + 218, + 235, + 35, + 38, true, - "org/10.2118/197610-MS", - "org/10.2118/197610-MS" + "tables and images", + "tables and images" ], [ - "reference", - "volume", - 13946275785662847920, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/172", + "#/texts/30", 1.0, - 15441160910541481982, - 13393763465685487585, - 18446744073709551615, - 18446744073709551615, - 253, - 255, - 253, - 255, - 52, - 53, + 16381206513098478539, + 15214895821358181557, + null, + null, + 218, + 224, + 218, + 224, + 35, + 36, true, - "10", - "10" + "tables", + "tables" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 3027248490321213074, - 16283814403211008850, - 18446744073709551615, - 18446744073709551615, - 3, - 14, - 3, - 14, - 2, - 4, + 16381206560620045048, + 15781754046980462859, + null, + null, + 229, + 235, + 229, + 235, + 37, + 38, true, - "Guillaume L", - "Guillaume L" + "images", + "images" ], [ - "reference", - "author", - 7693798302433367973, + "expression", + "word-concatenation", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 14650310996645589292, - 14357325801323977565, - 18446744073709551615, - 18446744073709551615, - 16, - 24, - 16, - 24, - 5, - 7, + 5044385734724420019, + 8851830242204350949, + null, + null, + 244, + 260, + 244, + 260, + 39, + 46, true, - "Miguel B", - "Miguel B" + "state-of-the-art", + "state-of-the-art" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 6049415556904669075, - 4491667145265607561, - 18446744073709551615, - 18446744073709551615, - 26, - 35, - 26, - 35, - 8, - 10, + 329104161640023790, + 5803290172135587372, + null, + null, + 244, + 249, + 244, + 249, + 39, + 40, true, - "Sandeep S", - "Sandeep S" + "state", + "state" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 14650438760956024332, - 12941354247565292233, - 18446744073709551615, - 18446744073709551615, - 37, - 45, - 37, + 7522030051285448165, + 9292971916934787798, + null, + null, + 257, + 273, + 257, + 273, 45, - 11, - 13, + 47, true, - "Kazuya K", - "Kazuya K" + "art segmentation", + "art segmentation" ], [ - "reference", - "author", - 7693798302433367973, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 14650449385951782031, - 12018837533588020118, - 18446744073709551615, - 18446744073709551615, - 47, - 55, - 47, - 55, - 14, - 17, + 3070945404202872591, + 10669708669164076111, + null, + null, + 278, + 294, + 278, + 294, + 48, + 50, true, - "Chris D.", - "Chris D." + "natural language", + "natural language" ], [ - "reference", - "citation-number", - 7693798302433367973, + "parenthesis", + "round brackets", + 3052020526349962744, "TEXT", - "#/texts/173", + "#/texts/30", 1.0, - 17767354399704235157, - 9080683344301571175, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 329104053347765356, + 2109302919745639425, + null, + null, + 309, + 314, + 309, + 314, + 51, + 54, true, - "5", - "5" + "(NLU)", + "(NLU)" ], [ - "reference", - "author", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 12139207556299923335, - 12395232115938598978, - 18446744073709551615, - 18446744073709551615, - 3, - 16, - 3, - 16, - 2, - 5, + 12178341415896299941, + 15271888912268631188, + null, + null, + 310, + 313, + 310, + 313, + 52, + 53, true, - "Chiu Jason PC", - "Chiu Jason PC" + "NLU", + "NLU" ], [ - "reference", - "author", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 8106350848262626922, - 5052428205716655678, - 18446744073709551615, - 18446744073709551615, - 18, - 25, - 18, - 25, - 6, - 9, + 16381206567230470443, + 13599704792953880118, + null, + null, + 315, + 321, + 315, + 321, + 54, + 55, true, - "Eric N.", - "Eric N." + "models", + "models" ], [ - "reference", - "citation-number", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 17767354399704235158, - 2935027410945303089, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 8746927308312045639, + 6749934577204905868, + null, + null, + 326, + 342, + 326, + 342, + 56, + 58, true, - "6", - "6" + "extract entities", + "extract entities" ], [ - "reference", - "date", - 3109792572574236398, + "term", + "enum-term-mark-3", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 389609625548777056, - 1668465275038003542, - 18446744073709551615, - 18446744073709551615, - 87, - 91, - 87, - 91, - 20, - 21, + 13335488353876392384, + 2597733537392511997, + null, + null, + 334, + 360, + 334, + 360, + 57, + 60, true, - "2016", - "2016" + "entities and relationships", + "entities and relationships" ], [ - "reference", - "journal", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 389609625541773713, - 1712767977156820574, - 18446744073709551615, - 18446744073709551615, - 81, - 85, - 81, + 8279380567349713241, + 11473550069815089395, + null, + null, + 347, + 360, + 347, + 360, + 59, + 60, + true, + "relationships", + "relationships" + ], + [ + "sentence", + "proper", + 3052020526349962744, + "TEXT", + "#/texts/30", + 1.0, + 7829367821776224855, + 12429790798463300743, + null, + null, + 372, + 497, + 372, + 497, + 63, 85, - 18, - 19, true, - "TACL", - "TACL" + "The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time.", + "The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time." ], [ - "reference", - "title", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 16636370883913883252, - 5810162511985509685, - 18446744073709551615, - 18446744073709551615, - 26, - 79, - 26, - 79, - 9, - 17, + 864107477833444286, + 4664743449195732093, + null, + null, + 376, + 392, + 376, + 392, + 64, + 66, true, - "Named entity recognition with bidirectional LSTM-CNNs", - "Named entity recognition with bidirectional LSTM-CNNs" + "second component", + "second component" ], [ - "reference", - "title", - 3109792572574236398, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/174", + "#/texts/30", 1.0, - 9584872678510603869, - 10893893406063870923, - 18446744073709551615, - 18446744073709551615, - 91, - 101, - 91, - 101, - 21, - 25, + 15441160910541480204, + 11436442094831901011, + null, + null, + 412, + 414, + 412, + 414, + 69, + 70, true, - ";4:357-370", - ";4:357-370" + "KG", + "KG" ], [ - "reference", - "author", - 8111170387462350170, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 6611312511369759405, - 3019524304480366334, - 18446744073709551615, - 18446744073709551615, - 3, - 12, - 3, - 12, - 2, - 4, + 329104159157820437, + 6168600621351033593, + null, + null, + 425, + 430, + 425, + 430, + 72, + 73, true, - "Matthew H", - "Matthew H" + "users", + "users" ], [ - "reference", - "author", - 8111170387462350170, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 8106350362383531053, - 10877267985434630613, - 18446744073709551615, - 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 8, + 7076268937724050913, + 2952839948443340364, + null, + null, + 442, + 454, + 442, + 454, + 75, + 77, true, - "Ines M.", - "Ines M." + "deep queries", + "deep queries" ], [ - "reference", - "citation-number", - 8111170387462350170, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 17767354399704235159, - 17892509173094146701, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 1325639643510008878, + 14062961483119642395, + null, + null, + 459, + 483, + 459, + 483, + 78, + 81, true, - "7", - "7" + "advanced graph analytics", + "advanced graph analytics" ], [ - "reference", - "date", - 8111170387462350170, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 389609625548777057, - 14192492111179186414, - 18446744073709551615, - 18446744073709551615, - 151, - 155, - 151, - 155, - 28, - 29, + 6165973182635301010, + 2523980927240404445, + null, + null, + 487, + 496, + 487, + 496, + 82, + 84, true, - "2017", - "2017" + "real time", + "real time" ], [ - "reference", - "editor", - 8111170387462350170, + "numval", + "ival", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 5944998866513528822, - 6604265927490760522, - 18446744073709551615, - 18446744073709551615, - 140, - 149, - 140, - 149, - 25, - 27, + 17767354399704235162, + 4099649421554807498, + null, + null, + 498, + 499, + 498, + 499, + 85, + 86, true, - "To appear", - "To appear" + "2", + "2" ], [ - "reference", - "title", - 8111170387462350170, + "sentence", + "proper", + 3052020526349962744, "TEXT", - "#/texts/175", + "#/texts/30", 1.0, - 8673657110667713983, - 2132423457048291450, - 18446744073709551615, - 18446744073709551615, - 22, - 138, - 22, - 138, - 8, - 24, + 854837096675760532, + 7651946466094830673, + null, + null, + 500, + 647, + 500, + 647, + 86, + 108, true, - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing", - "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing" + "This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", + "This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration." ], [ - "reference", - "author", - 14682702346227170925, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/176", + "#/texts/30", 1.0, - 6627095272342846459, - 8960025720845820047, - 18446744073709551615, - 18446744073709551615, - 3, - 12, - 3, - 12, - 2, - 4, + 13015591071425028695, + 16405170400950667634, + null, + null, + 548, + 570, + 548, + 570, + 94, + 97, true, - "Magoon LB", - "Magoon LB" + "optimized graph engine", + "optimized graph engine" ], [ - "reference", - "author", - 14682702346227170925, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/176", + "#/texts/30", 1.0, - 6563582333827106756, - 4026322596752919867, - 18446744073709551615, - 18446744073709551615, - 14, - 23, - 14, - 23, - 5, - 7, + 13240311013633905449, + 13899855425573318778, + null, + null, + 608, + 620, + 608, + 620, + 102, + 103, true, - "Hudson TL", - "Hudson TL" + "requirements", + "requirements" ], [ - "reference", - "author", - 14682702346227170925, + "term", + "single-term", + 3052020526349962744, "TEXT", - "#/texts/176", + "#/texts/30", 1.0, - 1612814864176813785, - 12195293078214673428, - 18446744073709551615, - 18446744073709551615, - 25, - 35, - 25, - 35, - 8, - 11, + 13671659409933113155, + 3777936483828020599, + null, + null, + 625, + 646, + 625, + 646, + 104, + 107, true, - "Peters KE.", - "Peters KE." + "deep data exploration", + "deep data exploration" ], [ - "reference", - "citation-number", - 14682702346227170925, + "sentence", + "proper", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 17767354399704235152, - 15651484829649486928, - 18446744073709551615, - 18446744073709551615, + 6831551111511447609, + 13413788254932797194, + null, + null, 0, - 1, + 200, 0, - 1, + 200, 0, - 1, + 35, true, - "8", - "8" + "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries.", + "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries." ], [ - "reference", - "date", - 14682702346227170925, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 329104147695665975, - 7749771140976442, - 18446744073709551615, - 18446744073709551615, - 163, - 168, - 163, - 168, - 38, + 12779036928191531604, + 16622894821397688807, + null, + null, + 28, 40, + 28, + 40, + 6, + 8, true, - "2005;", - "2005;" + "CPS platform", + "CPS platform" ], [ - "reference", - "journal", - 14682702346227170925, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 14445748745948696227, - 6494504935180328364, - 18446744073709551615, - 18446744073709551615, - 139, - 161, - 139, - 161, - 32, - 37, + 7724009801520989273, + 7797950652455693225, + null, + null, + 64, + 81, + 64, + 81, + 12, + 14, true, - "Am Assoc Pet Geol Bull", - "Am Assoc Pet Geol Bull" + "cloud application", + "cloud application" ], [ - "reference", - "title", - 14682702346227170925, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 10827383077041810226, - 7289787549141850214, - 18446744073709551615, - 18446744073709551615, - 36, - 52, - 36, - 52, - 11, - 16, + 16998720417278708113, + 16159794011975202711, + null, + null, + 121, + 150, + 121, + 150, + 20, + 25, true, - "Egret-Hibernia(!", - "Egret-Hibernia(!" + "multiple real-world scenarios", + "multiple real-world scenarios" ], [ - "reference", - "title", - 14682702346227170925, + "expression", + "word-concatenation", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 8991166294068381652, - 13146587142049422219, - 18446744073709551615, - 18446744073709551615, - 55, - 137, - 55, - 137, - 18, - 31, + 15984801488078789848, + 14766777380716059078, + null, + null, + 130, + 140, + 130, + 140, + 21, + 24, true, - "a significant petroleum system, northern Grand Banks area, offshore eastern Canada", - "a significant petroleum system, northern Grand Banks area, offshore eastern Canada" + "real-world", + "real-world" ], [ - "reference", - "volume", - 14682702346227170925, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/176", + "#/texts/31", 1.0, - 12994571832648066926, - 17152242518570841800, - 18446744073709551615, - 18446744073709551615, - 168, - 183, - 168, - 183, - 40, - 46, + 10788814978233814896, + 9709242714425521456, + null, + null, + 154, + 170, + 154, + 170, + 26, + 28, true, - "89(9):1203-1237", - "89(9):1203-1237" + "material science", + "material science" ], [ "numval", - "year", - 18391264192891079539, + "ival", + 6725501529910185390, "TEXT", - "#/texts/177", + "#/texts/31", 1.0, - 389609625548777262, - 8826555294676663632, - 18446744073709551615, - 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 17767354399704235163, + 14253331712813347451, + null, + null, + 171, + 172, + 171, + 172, + 28, + 29, true, - "2020", - "2020" + "3", + "3" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "enum-term-mark-2", + 6725501529910185390, "TEXT", - "#/texts/177", + "#/texts/31", 1.0, - 389609625548777251, - 8826555296349648778, - 18446744073709551615, - 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 9418848057117014737, + 18338967318945171834, + null, + null, + 177, + 188, + 177, + 188, + 30, + 33, true, - "2023", - "2023" + "oil and gas", + "oil and gas" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/177", + "#/texts/31", 1.0, - 8104408072666212335, - 13552219042525319352, - 18446744073709551615, - 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 12178341415895623363, + 8134859084711314461, + null, + null, + 177, + 180, + 177, + 180, + 30, + 31, true, - "10.1002", - "10.1002" + "oil", + "oil" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 6725501529910185390, "TEXT", - "#/texts/177", + "#/texts/31", 1.0, - 389609625548868096, - 8826558551385119058, - 18446744073709551615, - 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 9846194482272547581, + 8008602840197678050, + null, + null, + 185, + 199, + 185, + 199, + 32, + 34, true, - "2.20", - "2.20" + "gas industries", + "gas industries" ], [ "numval", "ival", - 18391264192891079539, + 6725501529910185390, "TEXT", - "#/texts/177", + "#/texts/31", 1.0, - 14654386914267794441, - 12796143052106760105, - 18446744073709551615, - 18446744073709551615, + 17767354399704235156, + 14253331712656803661, + null, + null, + 201, + 202, + 201, + 202, + 35, + 36, + true, + "4", + "4" + ], + [ + "sentence", + "proper", + 14814111183601762276, + "TEXT", + "#/texts/32", + 1.0, + 10957561452305435035, + 14135301223768703134, + null, + null, 0, - 8, + 140, 0, - 8, + 140, 0, - 1, + 26, true, - "26895595", - "26895595" + "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS.", + "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 17767354399704235162, - 7753390158484899261, - 18446744073709551615, - 18446744073709551615, + 6165970943308474352, + 5673487969622609676, + null, + null, + 7, 16, - 17, + 7, 16, - 17, - 4, - 5, + 2, + 3, true, - "2", - "2" + "remainder", + "remainder" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 15441160910541481791, - 3518619573290839093, - 18446744073709551615, - 18446744073709551615, - 113, - 115, - 113, - 115, + 329104161668023890, + 17278239549203684871, + null, + null, + 25, 30, - 31, + 25, + 30, + 5, + 6, true, - "23", - "23" + "paper", + "paper" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 15441160910541481543, - 3518617976696906498, - 18446744073709551615, - 18446744073709551615, - 116, - 118, - 116, - 118, - 32, - 33, + 16381206568246674273, + 5139794507444996832, + null, + null, + 46, + 52, + 46, + 52, + 10, + 11, true, - "08", - "08" + "detail", + "detail" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 8536069645534292969, - 16063604623463467342, - 18446744073709551615, - 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 11289641655891678136, + 5471089380666220349, + null, + null, + 57, + 74, + 57, + 74, + 12, + 14, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "technical aspects", + "technical aspects" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 594099663775968682, - 14698211805947073928, - 18446744073709551615, - 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 1138422908050553065, + 14505894906473969976, + null, + null, + 79, + 101, + 79, + 101, + 15, + 17, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "implementation details", + "implementation details" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/177", + "#/texts/32", 1.0, - 1697220653346092555, - 8458710314769009562, - 18446744073709551615, - 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 3812062755894317903, + 11765129466643902740, + null, + null, + 113, + 128, + 113, + 128, + 20, + 22, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "main components", + "main components" ], [ - "reference", - "author", - 11430385775112165283, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 7087532328962869115, - 5488976721015347116, - 18446744073709551615, - 18446744073709551615, - 3, - 13, - 3, - 13, - 2, - 5, + 12178341415896222428, + 15683249918480756789, + null, + null, + 136, + 139, + 136, + 139, + 24, + 25, true, - "Estrada E.", - "Estrada E." + "CPS", + "CPS" ], [ - "reference", - "citation-number", - 11430385775112165283, + "sentence", + "proper", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 17767354399704235153, - 10433678415276841389, - 18446744073709551615, - 18446744073709551615, - 0, - 1, - 0, - 1, - 0, - 1, + 13779837797648362784, + 3604173677770761086, + null, + null, + 141, + 239, + 141, + 239, + 26, + 46, true, - "9", - "9" + "In section 2, we present in depth how the platform extracts facts from corpora at a massive scale.", + "In section 2, we present in depth how the platform extracts facts from corpora at a massive scale." ], [ - "reference", - "date", - 11430385775112165283, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 8104407400303630267, - 3516783299715161152, - 18446744073709551615, - 18446744073709551615, - 67, - 74, - 67, - 74, - 15, - 18, + 8106478708629288965, + 7671933616087053356, + null, + null, + 144, + 151, + 144, + 151, + 27, + 28, true, - "2005;71", - "2005;71" + "section", + "section" ], [ - "reference", - "journal", - 11430385775112165283, + "numval", + "ival", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 1821145667706451373, - 6349148037602643636, - 18446744073709551615, - 18446744073709551615, - 55, - 65, - 55, - 65, - 11, - 14, + 17767354399704235162, + 3186926300182333312, + null, + null, + 152, + 153, + 152, + 153, + 28, + 29, true, - "Phys Rev E", - "Phys Rev E" + "2", + "2" ], [ - "reference", - "title", - 11430385775112165283, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 10002059539925749429, - 4038144589619849267, - 18446744073709551615, - 18446744073709551615, - 14, - 53, - 14, - 53, - 5, - 10, + 329104162100250438, + 17070642353424787571, + null, + null, + 169, + 174, + 169, + 174, + 33, + 34, true, - "Subgraph centrality in complex networks", - "Subgraph centrality in complex networks" + "depth", + "depth" ], [ - "reference", - "volume", - 11430385775112165283, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/178", + "#/texts/32", 1.0, - 6573854687835318787, - 906292219904540950, - 18446744073709551615, - 18446744073709551615, - 75, - 84, - 75, - 84, - 19, - 23, + 14814125365076808131, + 17530184478432761737, + null, + null, + 183, + 191, + 183, + 191, + 36, + 37, true, - "5):056103", - "5):056103" + "platform", + "platform" ], [ - "reference", - "author", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 2628812302410383486, - 8225541491002394036, - 18446744073709551615, - 18446744073709551615, - 4, - 19, - 4, - 19, - 2, - 4, + 329104161809952077, + 17277977825643184942, + null, + null, + 201, + 206, + 201, + 206, + 38, + 39, true, - "Estrada Ernesto", - "Estrada Ernesto" + "facts", + "facts" ], [ - "reference", - "author", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 17728567422753594500, - 4401840231895103727, - 18446744073709551615, - 18446744073709551615, - 21, - 38, - 21, - 38, - 5, - 9, + 8106398483106473371, + 590821573198333812, + null, + null, + 212, + 219, + 212, + 219, + 40, + 41, true, - "Higham Desmond J.", - "Higham Desmond J." + "corpora", + "corpora" ], [ - "reference", - "citation-number", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 15441160910541481982, - 2952327273286615865, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 15616229620328161347, + 13533638390690607221, + null, + null, + 225, + 238, + 225, + 238, + 43, + 45, true, - "10", - "10" + "massive scale", + "massive scale" ], [ - "reference", - "date", - 5825495964576843004, + "sentence", + "improper", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 389609625548777062, - 8937154938925173833, - 18446744073709551615, - 18446744073709551615, - 40, - 44, - 40, - 44, - 10, - 11, + 7914280693915181635, + 14854893084684010129, + null, + null, + 240, + 346, + 240, + 346, + 46, + 68, true, - "2010", - "2010" + "In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", + "In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient" ], [ - "reference", - "journal", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 745633759305567859, - 2105664067016610109, - 18446744073709551615, - 18446744073709551615, - 47, - 112, + 8106478708629288965, + 7671933616087044621, + null, + null, + 243, + 250, + 243, + 250, 47, - 112, - 13, - 22, + 48, true, - "Network Properties Revealed through Matrix Functions. SIAM Review", - "Network Properties Revealed through Matrix Functions. SIAM Review" + "section", + "section" ], [ - "reference", - "pages", - 5825495964576843004, + "numval", + "ival", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 8104408773920978895, - 9147525378271823463, - 18446744073709551615, - 18446744073709551615, - 123, - 130, - 123, - 130, - 29, - 30, + 17767354399704235163, + 3186926300062863412, + null, + null, + 251, + 252, + 251, + 252, + 48, + 49, true, - "696-714", - "696-714" + "3", + "3" ], [ - "reference", - "url", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 16159594323378820687, - 15692242274322104012, - 18446744073709551615, - 18446744073709551615, - 132, - 167, - 132, - 167, - 31, - 44, + 16381206568246674273, + 5139794507445024315, + null, + null, + 265, + 271, + 265, + 271, + 53, + 54, true, - "http://dx.doi.org/10.1137/090761070", - "http://dx.doi.org/10.1137/090761070" + "detail", + "detail" ], [ - "reference", - "volume", - 5825495964576843004, + "term", + "single-term", + 14814111183601762276, "TEXT", - "#/texts/179", + "#/texts/32", 1.0, - 15441160910541486331, - 2952320863259255438, - 18446744073709551615, - 18446744073709551615, - 114, - 116, - 114, - 116, - 23, - 24, + 7076268937724050913, + 11092663505100283305, + null, + null, + 285, + 297, + 285, + 297, + 56, + 58, true, - "52", - "52" + "deep queries", + "deep queries" ], [ - "reference", - "volume", - 5825495964576843004, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/179", + "#/texts/33", 1.0, - 17767354399704235156, - 8049906977590018916, - 18446744073709551615, - 18446744073709551615, - 119, - 120, - 119, - 120, - 26, - 27, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, true, - "4", - "4" + "26895595", + "26895595" ], [ - "numval", - "year", - 5698421097735371040, + "sentence", + "improper", + 18391264192891079539, "TEXT", - "#/texts/180", + "#/texts/33", 1.0, - 389609625548777055, - 1517668227262464254, - 18446744073709551615, - 18446744073709551615, - 45, - 49, - 45, - 49, - 9, - 10, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, true, - "2019", - "2019" + ", 2020, 2,", + ", 2020, 2," ], [ "numval", - "fval", - 5698421097735371040, + "year", + 18391264192891079539, "TEXT", - "#/texts/180", + "#/texts/33", 1.0, - 12178341415896427355, - 7596226314134098818, - 18446744073709551615, - 18446744073709551615, - 40, - 43, - 40, - 43, - 7, - 8, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, true, - "1.0", - "1.0" + "2020", + "2020" ], [ "numval", "ival", - 5698421097735371040, + 18391264192891079539, "TEXT", - "#/texts/180", + "#/texts/33", 1.0, - 15441160910541481983, - 11293846485728944316, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, true, - "11", - "11" + "2", + "2" ], [ - "reference", - "author", - 5870535063942256428, + "sentence", + "proper", + 18391264192891079539, "TEXT", - "#/texts/181", + "#/texts/33", 1.0, - 15754713894443025139, - 17869835566751337591, - 18446744073709551615, - 18446744073709551615, - 4, - 15, - 4, - 15, - 2, - 4, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, true, - "TigerGraph.", - "TigerGraph." + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." ], [ - "reference", - "citation-number", - 5870535063942256428, + "link", + "url", + 18391264192891079539, "TEXT", - "#/texts/181", + "#/texts/33", 1.0, - 15441160910541481976, - 12703724519968684238, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, true, - "12", - "12" + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," ], [ - "reference", - "date", - 5870535063942256428, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/181", + "#/texts/33", 1.0, - 389609625548777054, - 3194806985827377522, - 18446744073709551615, - 18446744073709551615, - 47, - 51, - 47, - 51, - 11, - 12, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, true, - "2018", - "2018" + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" ], [ - "reference", - "title", - 5870535063942256428, + "link", + "doi", + 18391264192891079539, "TEXT", - "#/texts/181", + "#/texts/33", 1.0, - 17475892521501552303, - 8529795867214537154, - 18446744073709551615, - 18446744073709551615, - 16, - 45, - 16, - 45, - 4, - 10, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, true, - "Real-Time Deep Link Analytics", - "Real-Time Deep Link Analytics" + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," ], [ - "reference", - "author", - 18196767266655606709, + "numval", + "fval", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 14652280730090715542, - 9368048166047908224, - 18446744073709551615, - 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, true, - "Jeremy K", - "Jeremy K" + "10.1002", + "10.1002" ], [ - "reference", - "author", - 18196767266655606709, + "numval", + "fval", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 8106396242733918714, - 2646308426186848374, - 18446744073709551615, - 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 8, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, true, - "John G.", - "John G." + "2.20", + "2.20" ], [ - "reference", - "citation-number", - 18196767266655606709, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 15441160910541481977, - 12462842527617278799, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, true, - "13", - "13" + "Wiley Online Library", + "Wiley Online Library" ], [ - "reference", - "date", - 18196767266655606709, + "parenthesis", + "square brackets", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 16381206542172555296, - 17521384641614480308, - 18446744073709551615, - 18446744073709551615, - 138, - 144, - 138, - 144, - 27, - 29, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, true, - "; 2011", - "; 2011" + "[23/08/2023]", + "[23/08/2023]" ], [ - "reference", - "journal", - 18196767266655606709, + "expression", + "wtoken-concatenation", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 1813266722082342225, - 593931840598100395, - 18446744073709551615, - 18446744073709551615, - 74, - 86, - 74, - 86, - 17, - 18, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, true, - "Philadelphia", - "Philadelphia" + "[23/08/2023]", + "[23/08/2023]" ], [ - "reference", - "publisher", - 18196767266655606709, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 12316905074950798954, - 2223929060632914124, - 18446744073709551615, - 18446744073709551615, - 92, - 138, - 92, - 138, - 21, - 27, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, true, - "Society for Industrial and Applied Mathematics", - "Society for Industrial and Applied Mathematics" + "23", + "23" ], [ - "reference", - "title", - 18196767266655606709, + "numval", + "ival", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 11539515714196318944, - 4409464707523225606, - 18446744073709551615, - 18446744073709551615, - 22, - 72, - 22, - 72, - 8, - 16, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, true, - "Graph Algorithms in the Language of Linear Algebra", - "Graph Algorithms in the Language of Linear Algebra" + "08", + "08" ], [ - "reference", - "volume", - 18196767266655606709, + "numval", + "year", + 18391264192891079539, "TEXT", - "#/texts/182", + "#/texts/33", 1.0, - 12178341415896290392, - 14083523807676346774, - 18446744073709551615, - 18446744073709551615, - 88, - 91, - 88, - 91, - 19, - 21, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, true, - "PA:", - "PA:" + "2023", + "2023" ], [ - "reference", - "author", - 3623403683642367845, + "sentence", + "improper", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 3893756947393595038, - 15910484170600691612, - 18446744073709551615, - 18446744073709551615, - 4, - 17, - 4, - 17, - 2, - 4, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, true, - "Kepner Jeremy", - "Kepner Jeremy" + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" ], [ - "reference", - "author", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 4638041857648041651, - 2139644705806385528, - 18446744073709551615, - 18446744073709551615, - 19, - 30, - 19, - 30, - 5, - 7, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, true, - "Bader David", - "Bader David" + "Terms", + "Terms" ], [ - "reference", - "author", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 9621725435760800320, - 4639858687526125642, - 18446744073709551615, - 18446744073709551615, - 32, - 47, - 32, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, 45, - 8, - 12, + 46, true, - "Bulu\u00e7 Ayd \u0131 n", - "Bulu\u00e7 Ayd \u0131 n" + "Conditions", + "Conditions" ], [ - "reference", - "author", - 3623403683642367845, + "parenthesis", + "round brackets", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 978039607314331382, - 9008054255178396141, - 18446744073709551615, - 18446744073709551615, - 49, - 61, - 47, - 59, - 13, - 15, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, true, - "Gilbert John", - "Gilbert John" + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" ], [ - "reference", - "author", - 3623403683642367845, + "link", + "url", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 10968707392751490476, - 11627993516556341660, - 18446744073709551615, - 18446744073709551615, - 63, - 78, - 61, - 76, - 16, - 18, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, true, - "Mattson Timothy", - "Mattson Timothy" + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" ], [ - "reference", - "author", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 3010219124533777340, - 3552467627404320563, - 18446744073709551615, - 18446744073709551615, - 80, - 98, - 78, - 96, - 19, - 21, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, true, - "Meyerhenke Henning", - "Meyerhenke Henning" + "Wiley Online Library", + "Wiley Online Library" ], [ - "reference", - "citation-number", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 15441160910541481978, - 9067685736347109846, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, true, - "14", - "14" + "rules", + "rules" ], [ - "reference", - "date", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 389609625548777059, - 3330964369910711146, - 18446744073709551615, - 18446744073709551615, - 100, - 104, - 98, - 102, - 22, - 23, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, true, - "2015", - "2015" + "use", + "use" ], [ - "reference", - "date", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 389609625548777059, - 3330964369910703397, - 18446744073709551615, - 18446744073709551615, - 240, - 244, - 238, - 242, - 61, - 62, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, true, - "2015", - "2015" + "OA articles", + "OA articles" ], [ - "reference", - "location", - 3623403683642367845, + "term", + "single-term", + 18391264192891079539, "TEXT", - "#/texts/183", + "#/texts/33", 1.0, - 9440834537675533739, - 6746478687441634720, - 18446744073709551615, - 18446744073709551615, - 107, - 143, - 105, - 141, - 25, - 33, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, true, - "Graphs, Matrices, and the GraphBLAS:", - "Graphs, Matrices, and the GraphBLAS:" + "applicable Creative Commons License", + "applicable Creative Commons License" ], [ - "reference", - "pages", - 3623403683642367845, + "numval", + "ival", + 4361549266681704196, "TEXT", - "#/texts/183", + "#/texts/34", 1.0, - 6573068860818606718, - 4687668980596472570, - 18446744073709551615, - 18446744073709551615, - 195, - 204, - 193, - 202, - 43, - 44, + 17767354399704235163, + 165380245946403556, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, true, - "2453-2462", - "2453-2462" + "3", + "3" ], [ - "reference", - "pages", - 3623403683642367845, + "sentence", + "improper", + 4361549266681704196, "TEXT", - "#/texts/183", + "#/texts/34", 1.0, - 16380805713218987920, - 9575999090851094928, - 18446744073709551615, - 18446744073709551615, - 245, - 251, - 243, - 249, - 63, - 64, + 15441160910541485670, + 10132017202982233095, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, true, - "05.353", - "05.353" + "of", + "of" ], [ - "reference", - "publisher", - 3623403683642367845, + "numval", + "ival", + 4361549266681704196, "TEXT", - "#/texts/183", + "#/texts/34", 1.0, - 10585062274889693433, - 394824704429372117, - 18446744073709551615, - 18446744073709551615, - 144, - 162, - 142, - 160, - 33, - 36, + 15441160910541481979, + 10132017072037949157, + null, + null, + 3, + 5, + 3, + 5, + 2, + 3, true, - "Seven Good Reasons", - "Seven Good Reasons" + "15", + "15" ], [ - "reference", - "publisher", - 3623403683642367845, + "sentence", + "improper", + 8043608144162608258, "TEXT", - "#/texts/183", + "#/texts/35", 1.0, - 11311803343161413167, - 2833609951174621747, - 18446744073709551615, - 18446744073709551615, - 164, - 189, - 162, - 187, - 37, - 40, + 15874830917445991279, + 14751652686275270830, + null, + null, + 0, + 12, + 0, + 12, + 0, + 3, true, - "Procedia Computer Science", - "Procedia Computer Science" + "way with our", + "way with our" ], [ - "reference", - "url", - 3623403683642367845, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/183", + "#/texts/35", 1.0, - 16959048237954323084, - 10596594611762835857, - 18446744073709551615, - 18446744073709551615, - 206, - 239, - 204, - 237, - 45, - 60, + 12178341415895525628, + 4396159551467569503, + null, + null, + 0, + 3, + 0, + 3, + 0, + 1, true, - "http://dx.doi.org/10.1016/j.procs", - "http://dx.doi.org/10.1016/j.procs" + "way", + "way" ], [ - "reference", - "volume", - 3623403683642367845, + "sentence", + "proper", + 8043608144162608258, "TEXT", - "#/texts/183", + "#/texts/35", 1.0, - 15441160910541486330, - 9067694506000682765, - 18446744073709551615, - 18446744073709551615, - 191, - 193, - 189, - 191, - 41, - 42, + 5042236808703360545, + 8650712924483655573, + null, + null, + 13, + 43, + 13, + 43, + 3, + 9, true, - "51", - "51" + "high-performance graph engine.", + "high-performance graph engine." ], [ - "reference", - "author", - 13936866850854297069, + "expression", + "word-concatenation", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 8106396252822508385, - 7971302054101082514, - 18446744073709551615, - 18446744073709551615, - 4, - 11, - 4, - 11, - 2, - 4, + 524264419207632938, + 10547923618079885832, + null, + null, + 13, + 29, + 13, + 29, + 3, + 6, true, - "Aydin B", - "Aydin B" + "high-performance", + "high-performance" ], [ - "reference", - "author", - 13936866850854297069, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 3367556578117774584, - 5704823584998723957, - 18446744073709551615, - 18446744073709551615, - 13, - 28, - 13, - 28, + 12049993933909531745, + 14197149878904628774, + null, + null, + 18, + 42, + 18, + 42, 5, - 9, + 8, true, - "Gilbert John R.", - "Gilbert John R." + "performance graph engine", + "performance graph engine" ], [ - "reference", - "citation-number", - 13936866850854297069, + "sentence", + "proper", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 15441160910541481979, - 10213682970367471311, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 13307765353894382590, + 13661934614468722351, + null, + null, + 44, + 153, + 44, + 153, + 9, + 31, true, - "15", - "15" + "Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud.", + "Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud." ], [ - "reference", - "date", - 13936866850854297069, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 329104147695662665, - 13454856964816440075, - 18446744073709551615, - 18446744073709551615, - 127, - 132, - 127, - 132, - 27, - 29, + 8106478708629288965, + 2690053722178369781, + null, + null, + 54, + 61, + 54, + 61, + 12, + 13, true, - "2011;", - "2011;" + "section", + "section" ], [ - "reference", - "journal", - 13936866850854297069, + "numval", + "ival", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 15067288891537767501, - 3357793480659482128, - 18446744073709551615, - 18446744073709551615, - 95, - 125, - 95, - 125, - 20, - 26, + 17767354399704235156, + 18342724908476302885, + null, + null, + 62, + 63, + 62, + 63, + 13, + 14, true, - "Int J High Perform Comput Appl", - "Int J High Perform Comput Appl" + "4", + "4" ], [ - "reference", - "title", - 13936866850854297069, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 6150328359964540652, - 10199114762007747151, - 18446744073709551615, - 18446744073709551615, - 29, - 93, - 29, - 93, - 9, + 16381206568246674273, + 9950902184857858955, + null, + null, + 84, + 90, + 84, + 90, 19, + 20, true, - "The combinatorial BLAS: design, implementation, and applications", - "The combinatorial BLAS: design, implementation, and applications" + "detail", + "detail" ], [ - "reference", - "volume", - 13936866850854297069, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 15441160910541481789, - 10213682972046866336, - 18446744073709551615, - 18446744073709551615, - 132, - 134, - 132, - 134, - 29, - 30, + 2703018952916355661, + 16475014154010855623, + null, + null, + 100, + 110, + 100, + 110, + 22, + 23, true, - "25", - "25" + "components", + "components" ], [ - "reference", - "volume", - 13936866850854297069, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/184", + "#/texts/35", 1.0, - 10114972591203837083, - 15828968969447743865, - 18446744073709551615, - 18446744073709551615, - 136, - 146, - 136, - 146, - 31, - 35, + 329104161517016668, + 8976287706705647369, + null, + null, + 147, + 152, + 147, + 152, + 29, + 30, true, - "4):496-509", - "4):496-509" + "cloud", + "cloud" ], [ - "reference", - "author", - 8497015665124263236, + "sentence", + "proper", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 14652280730090715542, - 12791881049692147803, - 18446744073709551615, - 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 1047931557873137932, + 18017234894347992215, + null, + null, + 154, + 262, + 154, + 262, + 31, + 53, true, - "Jeremy K", - "Jeremy K" + "Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", + "Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy." ], [ - "reference", - "author", - 8497015665124263236, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 8106352035144611671, - 4513564816050590788, - 18446744073709551615, - 18446744073709551615, - 14, - 21, - 14, - 21, - 5, - 7, + 8106478708629288965, + 2690053722178360825, + null, + null, + 166, + 173, + 166, + 173, + 34, + 35, true, - "Peter A", - "Peter A" + "section", + "section" ], [ - "reference", - "author", - 8497015665124263236, + "numval", + "ival", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 11373457542276896833, - 10633744312666392907, - 18446744073709551615, - 18446744073709551615, - 23, - 36, - 23, + 17767354399704235157, + 18342724908489108597, + null, + null, + 174, + 175, + 174, + 175, + 35, 36, - 8, - 11, true, - "Bader David A", - "Bader David A" + "5", + "5" ], [ - "reference", - "citation-number", - 8497015665124263236, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 15441160910541481860, - 13099555958800192769, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 4743061560066569517, + 9046325022279510283, + null, + null, + 192, + 207, + 192, + 207, + 40, + 42, true, - "16", - "16" + "complete system", + "complete system" ], [ - "reference", - "container-title", - 8497015665124263236, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 10709633855219206820, - 961925091352749103, - 18446744073709551615, - 18446744073709551615, - 88, - 102, - 88, - 102, - 21, - 24, + 12197753754046053748, + 545238828955449628, + null, + null, + 213, + 234, + 213, + 234, + 44, + 48, true, - "2016 IEEE HPEC", - "2016 IEEE HPEC" + "real world case study", + "real world case study" ], [ - "reference", - "date", - 8497015665124263236, + "term", + "single-term", + 8043608144162608258, "TEXT", - "#/texts/185", + "#/texts/35", 1.0, - 6573474049096193902, - 2260581871937703980, - 18446744073709551615, - 18446744073709551615, - 104, - 113, - 104, - 113, - 25, - 28, + 14650440612701450082, + 7631421264816179483, + null, + null, + 253, + 261, + 253, + 261, + 51, + 52, true, - "2016; 1-9", - "2016; 1-9" + "accuracy", + "accuracy" ], [ - "reference", - "title", - 8497015665124263236, + "numval", + "ival", + 7159467829896778939, "TEXT", - "#/texts/185", + "#/texts/36", 1.0, - 16641826418709048621, - 2282440200854755549, - 18446744073709551615, - 18446744073709551615, - 45, - 86, - 45, - 86, - 15, - 20, + 17767354399704235162, + 7924620771043007977, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, true, - "Mathematical foundations of the GraphBLAS", - "Mathematical foundations of the GraphBLAS" + "2", + "2" ], [ - "reference", - "author", - 15947529491299956047, + "sentence", + "improper", + 7159467829896778939, "TEXT", - "#/texts/186", + "#/texts/36", 1.0, - 14650296444613217893, - 2015187192231796797, - 18446744073709551615, - 18446744073709551615, + 7707525670076367550, + 550173016828544359, + null, + null, + 2, + 37, + 2, + 37, + 1, + 6, + true, + "| SCALABLE KNOWLEDGE GRAPH CREATION", + "| SCALABLE KNOWLEDGE GRAPH CREATION" + ], + [ + "term", + "single-term", + 7159467829896778939, + "TEXT", + "#/texts/36", + 1.0, + 8338193261817505168, + 7647354265926273138, + null, + null, 4, - 12, + 37, 4, - 12, + 37, 2, - 4, + 6, true, - "Ariful A", - "Ariful A" + "SCALABLE KNOWLEDGE GRAPH CREATION", + "SCALABLE KNOWLEDGE GRAPH CREATION" ], [ - "reference", - "author", - 15947529491299956047, + "sentence", + "proper", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 6611311853662317003, - 219996680584521934, - 18446744073709551615, - 18446744073709551615, - 14, - 23, - 14, + 12144997519516518537, + 9639908354679176796, + null, + null, + 0, + 124, + 0, + 124, + 0, 23, - 5, - 7, true, - "Mathias J", - "Mathias J" + "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges.", + "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges." ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 8106396252822508385, - 5214697480984905265, - 18446744073709551615, - 18446744073709551615, - 25, - 32, + 12178341415896222428, + 3018050375403736631, + null, + null, + 3, + 6, + 3, + 6, + 1, + 2, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 5877539623435777295, + 3070141646605830439, + null, + null, + 10, 25, - 32, - 8, 10, + 25, + 4, + 6, true, - "Aydin B", - "Aydin B" + "Knowledge Graph", + "Knowledge Graph" ], [ - "reference", - "author", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 1138450846564361539, - 13516232875802125645, - 18446744073709551615, - 18446744073709551615, - 34, - 46, - 34, - 46, + 2702984786539193186, + 2066105174092978753, + null, + null, + 42, + 52, + 42, + 52, + 10, 11, - 15, true, - "Ng Esmond G.", - "Ng Esmond G." + "collection", + "collection" ], [ - "reference", - "citation-number", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 15441160910541481861, - 5749903657566610070, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 14652256560445338257, + 14335368261363034099, + null, + null, + 56, + 64, + 56, + 64, + 12, + 13, true, - "17", - "17" + "entities", + "entities" ], [ - "reference", - "container-title", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 10701056912570859123, - 6872071652706022831, - 18446744073709551615, - 18446744073709551615, - 106, - 175, - 106, - 175, - 26, - 34, + 8279380567349713241, + 12534042586919924803, + null, + null, + 75, + 88, + 75, + 88, + 15, + 16, true, - "2017 IEEE International Parallel and Distributed Processing Symposium", - "2017 IEEE International Parallel and Distributed Processing Symposium" + "relationships", + "relationships" ], [ - "reference", - "container-title", - 15947529491299956047, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 329104161866629985, - 4498077561104002021, - 18446744073709551615, - 18446744073709551615, - 177, - 182, - 177, - 182, - 35, - 36, + 15863098611266611689, + 11938678268878922872, + null, + null, + 101, + 123, + 101, + 123, + 18, + 22, true, - "IPDPS", - "IPDPS" + "graphs nodes and edges", + "graphs nodes and edges" ], [ - "reference", - "date", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 7366731910384143591, - 4074534479596534226, - 18446744073709551615, - 18446744073709551615, - 185, - 196, - 185, - 196, - 38, - 41, + 2924937330842356899, + 7802493761985505696, + null, + null, + 101, + 113, + 101, + 113, + 18, + 20, true, - "2017: 22-31", - "2017: 22-31" + "graphs nodes", + "graphs nodes" ], [ - "reference", - "title", - 15947529491299956047, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/186", + "#/texts/37", 1.0, - 18143113072209505450, - 5317689214231344382, - 18446744073709551615, - 18446744073709551615, - 47, - 104, - 47, - 104, - 15, - 25, + 329104162186494203, + 15974017122495616980, + null, + null, + 118, + 123, + 118, + 123, + 21, + 22, true, - "The reverse Cuthill-McKee algorithm in distributed-memory", - "The reverse Cuthill-McKee algorithm in distributed-memory" + "edges", + "edges" ], [ - "reference", - "author", - 14843401725435831033, + "sentence", + "proper", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 9277063416399937233, - 9921862040524615824, - 18446744073709551615, - 18446744073709551615, - 4, - 14, - 4, - 14, - 2, - 4, + 15563824490297277995, + 17150277444298513036, + null, + null, + 125, + 167, + 125, + 167, + 23, + 32, true, - "Rukhsana S", - "Rukhsana S" + "Entities can have a wide variety of types.", + "Entities can have a wide variety of types." ], [ - "reference", - "author", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 8106479273814684994, - 12770854321018137055, - 18446744073709551615, - 18446744073709551615, - 16, - 23, - 16, + 14650436091620137967, + 17692238337199335254, + null, + null, + 125, + 133, + 125, + 133, 23, - 5, - 7, + 24, true, - "Anila U", - "Anila U" + "Entities", + "Entities" ], [ - "reference", - "author", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 16985962715048067011, - 772749724699858811, - 18446744073709551615, - 18446744073709551615, - 25, - 37, - 25, - 37, - 8, - 11, + 5069338760716920094, + 17809400713453098686, + null, + null, + 145, + 157, + 145, + 157, + 27, + 29, true, - "Chughtai IR.", - "Chughtai IR." + "wide variety", + "wide variety" ], [ - "reference", - "citation-number", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 15441160910541481862, - 17618650105274567067, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 329104159243796903, + 15885015617704145871, + null, + null, + 161, + 166, + 161, + 166, + 30, + 31, true, - "18", - "18" + "types", + "types" ], [ - "reference", - "date", - 14843401725435831033, + "sentence", + "proper", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 389609625548757410, - 18165604049296771030, - 18446744073709551615, - 18446744073709551615, - 88, - 92, - 88, - 92, - 19, - 20, + 12582210430540746378, + 14089013500727116382, + null, + null, + 168, + 262, + 168, + 262, + 32, + 49, true, - "2005", - "2005" + "A basic scenario includes types such as documents, document components, keywords, and authors.", + "A basic scenario includes types such as documents, document components, keywords, and authors." ], [ - "reference", - "date", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 389609625548757410, - 18165604049296772353, - 18446744073709551615, - 18446744073709551615, - 133, - 137, - 133, - 137, - 25, - 26, + 15408825885837354070, + 15053329185263397097, + null, + null, + 170, + 184, + 170, + 184, + 33, + 35, true, - "2005", - "2005" + "basic scenario", + "basic scenario" ], [ - "reference", - "pages", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 12178341415896427411, - 9464229838695116121, - 18446744073709551615, - 18446744073709551615, - 138, - 141, - 138, - 141, - 26, - 27, + 329104159243796903, + 15885015617704135557, + null, + null, + 194, + 199, + 194, + 199, + 36, + 37, true, - "1-7", - "1-7" + "types", + "types" ], [ - "reference", - "title", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 12931819230736677229, - 14856363282836835505, - 18446744073709551615, - 18446744073709551615, - 38, - 86, - 38, - 86, - 11, - 18, + 6167933651658664291, + 7313720567483528692, + null, + null, + 208, + 217, + 208, + 217, + 39, + 40, true, - "Review of storage techniques for sparse matrices", - "Review of storage techniques for sparse matrices" + "documents", + "documents" ], [ - "reference", - "title", - 14843401725435831033, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/187", + "#/texts/37", 1.0, - 1320248361117940781, - 5199561905441189481, - 18446744073709551615, - 18446744073709551615, - 93, - 131, - 93, - 131, - 20, - 24, + 17524405716142769441, + 12187828972523501476, + null, + null, + 219, + 238, + 219, + 238, + 41, + 43, true, - "Pakistan Section Multitopic Conference", - "Pakistan Section Multitopic Conference" + "document components", + "document components" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 14638563242508500832, - 2752940376292253295, - 18446744073709551615, - 18446744073709551615, - 4, - 12, - 4, - 12, - 2, - 4, + 8274047561994409760, + 2528562038833681719, + null, + null, + 228, + 261, + 228, + 261, + 42, + 48, true, - "Welte DH", - "Welte DH" + "components, keywords, and authors", + "components, keywords, and authors" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 1317380608127935415, - 8792991722627090893, - 18446744073709551615, - 18446744073709551615, - 14, - 25, - 14, - 25, - 5, - 7, + 14634111734655409321, + 3345538017068759698, + null, + null, + 240, + 248, + 240, + 248, + 44, + 45, true, - "Horsfield B", - "Horsfield B" + "keywords", + "keywords" ], [ - "reference", - "author", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 4172892994592792372, - 2160694788416159558, - 18446744073709551615, - 18446744073709551615, - 27, - 46, - 27, - 46, - 8, - 12, + 8106397759446161562, + 18033240504910693308, + null, + null, + 254, + 261, + 254, + 261, + 47, + 48, true, - "Baker DR. Petroleum", - "Baker DR. Petroleum" + "authors", + "authors" ], [ - "reference", - "author", - 16676439669743530711, + "sentence", + "proper", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 5561358046097680519, - 15395766198352277458, - 18446744073709551615, - 18446744073709551615, - 51, - 67, - 51, - 67, - 13, - 16, + 9972047232259612032, + 3846286086726380255, + null, + null, + 263, + 484, + 263, + 484, + 49, + 93, true, - "Basin Evolution:", - "Basin Evolution:" + "In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration.", + "In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration." ], [ - "reference", - "citation-number", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 15441160910541481863, - 8099163979199984832, - 18446744073709551615, - 18446744073709551615, - 0, - 2, - 0, - 2, - 0, - 1, + 14650447861280948245, + 10555199694781207120, + null, + null, + 266, + 274, + 266, + 274, + 50, + 51, true, - "19", - "19" + "addition", + "addition" ], [ - "reference", - "date", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 16381206542172924133, - 9981189962990674937, - 18446744073709551615, - 18446744073709551615, - 169, - 175, - 169, - 175, - 33, - 35, + 15130402071104315819, + 14505129180480892684, + null, + null, + 294, + 308, + 294, + 308, + 56, + 58, true, - "; 1997", - "; 1997" + "specific types", + "specific types" ], [ - "reference", - "journal", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 2422127895824933260, - 7556925222758925531, - 18446744073709551615, - 18446744073709551615, - 106, - 133, - 106, - 133, - 21, - 26, + 3534273810487275626, + 4373385647668922427, + null, + null, + 324, + 333, + 324, + 333, + 61, + 62, true, - "Geology, and Basin Modeling", - "Geology, and Basin Modeling" + "verticals", + "verticals" ], [ - "reference", - "location", - 16676439669743530711, + "term", + "enum-term-mark-3", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 11741555610443867475, - 15927342063432766432, - 18446744073709551615, - 18446744073709551615, - 135, - 153, - 135, - 153, - 27, - 30, + 15083712120508435047, + 15480403097954548676, + null, + null, + 343, + 367, + 343, + 367, + 65, + 68, true, - "Berlin Heidelberg:", - "Berlin Heidelberg:" + "materials and properties", + "materials and properties" ], [ - "reference", - "publisher", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 3197612152806046883, - 2512966040017790311, - 18446744073709551615, - 18446744073709551615, - 154, - 169, - 154, - 169, - 30, - 33, + 6179392753523812130, + 16868072188025352035, + null, + null, + 343, + 352, + 343, + 352, + 65, + 66, true, - "Springer-Verlag", - "Springer-Verlag" + "materials", + "materials" ], [ - "reference", - "title", - 16676439669743530711, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/188", + "#/texts/37", 1.0, - 1197865287651023688, - 134234943361095181, - 18446744073709551615, - 18446744073709551615, - 68, - 104, + 14088628410271132453, + 1983771389005185922, + null, + null, + 357, + 367, + 357, + 367, + 67, 68, - 104, - 16, - 20, true, - "Insights from Petroleum Geochemistry", - "Insights from Petroleum Geochemistry" + "properties", + "properties" ], [ - "reference", - "author", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 8106351306870445011, - 7231860053894851093, - 18446744073709551615, - 18446744073709551615, - 37, - 44, - 37, - 44, - 9, - 11, + 10788814978233814896, + 1701325665325828957, + null, + null, + 371, + 387, + 371, + 387, + 69, + 71, true, - "Dolfi M", - "Dolfi M" + "material science", + "material science" ], [ - "reference", - "author", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 8106479197488776816, - 6022123083747398357, - 18446744073709551615, - 18446744073709551615, - 46, - 53, - 46, - 53, - 12, - 15, + 9663226904190425014, + 11862188099935093855, + null, + null, + 392, + 407, + 392, + 407, + 73, + 75, true, - "Auer C.", - "Auer C." + "geological ages", + "geological ages" ], [ - "reference", - "date", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 12668563530344603848, - 14820206483220239473, - 18446744073709551615, - 18446744073709551615, - 173, - 183, - 173, - 183, - 35, - 41, + 16064217528453934834, + 2928977078579581381, + null, + null, + 409, + 419, + 409, + 419, + 76, + 77, true, - "2020;1:e20", - "2020;1:e20" + "formations", + "formations" ], [ - "reference", - "journal", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 10525943314116263182, - 11312474291607917611, - 18446744073709551615, - 18446744073709551615, - 153, - 171, - 153, - 171, - 31, - 34, + 329104161637315394, + 15906833325474937465, + null, + null, + 421, + 426, + 421, + 426, + 78, + 79, true, - "Applied AI Letters", - "Applied AI Letters" + "rocks", + "rocks" ], [ - "reference", - "title", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 912378836411683307, - 17710224191321636054, - 18446744073709551615, - 18446744073709551615, - 0, - 35, - 0, - 35, - 0, - 8, + 14638289846375411086, + 13515658020381275329, + null, + null, + 428, + 436, + 428, + 436, + 80, + 81, true, - "How to cite this article: Staar PWJ", - "How to cite this article: Staar PWJ" + "minerals", + "minerals" ], [ - "reference", - "title", - 2986547206451163051, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 4375081646508065875, - 5872894694925809811, - 18446744073709551615, - 18446744073709551615, - 54, - 151, - 54, - 151, - 15, - 30, + 14120356269929906404, + 7929205159710255559, + null, + null, + 438, + 448, + 438, + 448, + 82, + 83, true, - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora" + "structures", + "structures" ], [ - "reference", - "url", - 2986547206451163051, + "expression", + "common", + 5617240156952377, "TEXT", - "#/texts/189", + "#/texts/37", 1.0, - 751450063096904044, - 2161551171101074414, - 18446744073709551615, - 18446744073709551615, - 185, - 216, - 185, - 216, - 42, - 54, + 12178341415895450733, + 3018062721998632434, + null, + null, + 450, + 454, + 450, + 454, + 84, + 86, true, - "https://doi.org/10.1002/ail2.20", - "https://doi.org/10.1002/ail2.20" + "etc", + "etc." ], [ - "numval", - "year", - 18391264192891079539, + "term", + "enum-term-mark-2", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 389609625548777262, - 8826555294676663632, - 18446744073709551615, - 18446744073709551615, - 10, - 14, - 10, - 14, - 2, - 3, + 5515747999597331548, + 9117859149955612445, + null, + null, + 460, + 483, + 460, + 483, + 88, + 92, true, - "2020", - "2020" + "oil and gas exploration", + "oil and gas exploration" ], [ - "numval", - "year", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 389609625548777251, - 8826555296349648778, - 18446744073709551615, - 18446744073709551615, - 119, - 123, - 119, - 123, - 34, - 35, + 12178341415895623363, + 3018073362770496593, + null, + null, + 460, + 463, + 460, + 463, + 88, + 89, true, - "2023", - "2023" + "oil", + "oil" ], [ - "numval", - "fval", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 8104408072666212335, - 13552219042525319352, - 18446744073709551615, - 18446744073709551615, - 71, - 78, - 71, - 78, - 20, - 21, + 10692163443301812358, + 3113975335211030761, + null, + null, + 468, + 483, + 468, + 483, + 90, + 92, true, - "10.1002", - "10.1002" + "gas exploration", + "gas exploration" ], [ - "numval", - "fval", - 18391264192891079539, + "sentence", + "proper", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 389609625548868096, - 8826558551385119058, - 18446744073709551615, - 18446744073709551615, - 82, - 86, - 82, - 86, - 23, - 24, + 794739137233286117, + 9994141964316778148, + null, + null, + 485, + 551, + 485, + 551, + 93, + 104, true, - "2.20", - "2.20" + "Relationships in the KG are strictly defined between the entities.", + "Relationships in the KG are strictly defined between the entities." ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 14654386914267794441, - 12796143052106760105, - 18446744073709551615, - 18446744073709551615, - 0, - 8, - 0, - 8, - 0, - 1, + 1808270638656316647, + 13986132968258321440, + null, + null, + 485, + 498, + 485, + 498, + 93, + 94, true, - "26895595", - "26895595" + "Relationships", + "Relationships" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 17767354399704235162, - 7753390158484899261, - 18446744073709551615, - 18446744073709551615, - 16, - 17, - 16, - 17, - 4, - 5, + 15441160910541480204, + 6094061681036227158, + null, + null, + 506, + 508, + 506, + 508, + 96, + 97, true, - "2", - "2" + "KG", + "KG" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 15441160910541481791, - 3518619573290839093, - 18446744073709551615, - 18446744073709551615, - 113, - 115, + 14652256560445338257, + 14335368261362934201, + null, + null, + 542, + 550, + 542, + 550, + 102, + 103, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 1452563618681853259, + 7631253544865320080, + null, + null, + 552, + 659, + 552, + 659, + 104, + 131, + true, + "Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age ').", + "Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age ')." + ], + [ + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 14652256560445338257, + 14335368261362935731, + null, + null, + 567, + 575, + 567, + 575, + 107, + 108, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 8279380567349713241, + 12534042586919828830, + null, + null, + 581, + 594, + 581, + 594, + 110, + 111, + true, + "relationships", + "relationships" + ], + [ + "parenthesis", + "round brackets", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 2293008940386739952, + 9841196227768892901, + null, + null, + 605, + 658, + 605, + 658, 113, + 130, + true, + "(' has-material-property ' or ' has-geological-age ')", + "(' has-material-property ' or ' has-geological-age ')" + ], + [ + "expression", + "word-concatenation", + 5617240156952377, + "TEXT", + "#/texts/37", + 1.0, + 4906245502857778203, + 10671415923670610924, + null, + null, + 608, + 629, + 608, + 629, 115, - 30, - 31, + 120, true, - "23", - "23" + "has-material-property", + "has-material-property" ], [ - "numval", - "ival", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 15441160910541481543, - 3518617976696906498, - 18446744073709551615, - 18446744073709551615, - 116, - 118, - 116, + 14638289344044595472, + 7719849777298214612, + null, + null, + 612, + 620, + 612, + 620, + 117, 118, - 32, - 33, true, - "08", - "08" + "material", + "material" ], [ - "link", - "url", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 8536069645534292969, - 16063604623463467342, - 18446744073709551615, - 18446744073709551615, - 35, - 87, - 35, - 87, - 8, - 25, + 14814125841683215315, + 5160242431901684608, + null, + null, + 621, + 629, + 621, + 629, + 119, + 120, true, - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", - "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + "property", + "property" ], [ - "link", - "url", - 18391264192891079539, + "expression", + "word-concatenation", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 594099663775968682, - 14698211805947073928, - 18446744073709551615, - 18446744073709551615, - 156, - 208, - 156, - 208, - 43, - 58, + 18200117896215154992, + 3778470233715427826, + null, + null, + 637, + 655, + 637, + 655, + 123, + 128, true, - "https://onlinelibrary.wiley.com/terms-and-conditions", - "https://onlinelibrary.wiley.com/terms-and-conditions" + "has-geological-age", + "has-geological-age" ], [ - "link", - "doi", - 18391264192891079539, + "term", + "single-term", + 5617240156952377, "TEXT", - "#/texts/190", + "#/texts/37", 1.0, - 1697220653346092555, - 8458710314769009562, - 18446744073709551615, - 18446744073709551615, - 67, - 87, - 67, - 87, - 18, - 25, + 12178341415895571749, + 3018078695746189830, + null, + null, + 652, + 655, + 652, + 655, + 127, + 128, true, - "doi/10.1002/ail2.20,", - "doi/10.1002/ail2.20," + "age", + "age" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 15441160910541482672, - 3558959168916500461, - 0, - 2, - 3, - 5, - 3, - 5, - 1, - 3, + 3192889723504224118, + 10764133440858685575, + null, + null, + 660, + 796, + 660, + 796, + 131, + 156, true, - "-1", - "-1" + "Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", + "Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents." ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 15441160910541482673, - 3558959168967845780, - 0, - 3, - 3, - 5, - 3, - 5, - 1, - 3, + 8279380567349713241, + 12534042586919764573, + null, + null, + 666, + 679, + 666, + 679, + 133, + 134, true, - "-2", - "-2" + "relationships", + "relationships" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 15441160910541482674, - 3558959169084991311, - 0, - 4, - 3, - 5, - 3, - 5, - 1, - 3, + 15441160910541480204, + 6094061681036184736, + null, + null, + 687, + 689, + 687, + 689, + 136, + 137, true, - "-3", - "-3" + "KG", + "KG" ], [ - "numval", - "ival", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 15441160910541482676, - 3558959170275494348, - 0, - 5, - 3, - 5, - 3, - 5, - 1, - 3, + 8106397496085150773, + 13219162774327540266, + null, + null, + 711, + 718, + 711, + 718, + 142, + 143, true, - "-5", - "-5" + "example", + "example" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 389609625535995426, - 7990768689708475978, - 1, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 16946081241153289166, + 9864466924854841899, + null, + null, + 737, + 752, + 737, + 752, + 147, + 148, true, - "0.82", - "0.82" + "trustworthiness", + "trustworthiness" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 389609625535995621, - 7990774618103388257, - 1, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 389609625697921894, + 4210996759022448259, + null, + null, + 758, + 762, + 758, + 762, + 150, + 151, true, - "0.96", - "0.96" + "fact", + "fact" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 5617240156952377, + "TEXT", + "#/texts/37", 1.0, - 389609625535995627, - 7990774615713296517, - 1, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 11304142420310002900, + 15341405723120219151, + null, + null, + 772, + 784, + 772, + 784, + 153, + 154, true, - "0.98", - "0.98" + "relationship", + "relationship" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625536250803, - 7990774066976884381, - 1, - 5, + 5803220549886302367, + 15412963923438297980, + null, + null, 0, - 4, + 79, 0, - 4, + 79, 0, - 4, + 15, true, - "1.00", - "1.00" + "In typical cases, we start from a collection of documents in different formats.", + "In typical cases, we start from a collection of documents in different formats." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995622, - 7990774618160743993, - 2, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 471372152363700254, + 3033927950885562857, + null, + null, + 3, + 16, + 3, + 16, + 1, + 3, true, - "0.93", - "0.93" + "typical cases", + "typical cases" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995627, - 7990774615712524481, - 2, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 2702984786539193186, + 6013382759463234661, + null, + null, + 34, + 44, + 34, + 44, + 8, + 9, true, - "0.98", - "0.98" + "collection", + "collection" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625536250803, - 7990774066976098009, - 2, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 6167933651658664291, + 2995705359694128803, + null, + null, + 48, + 57, + 48, + 57, + 10, + 11, true, - "1.00", - "1.00" + "documents", + "documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625536250803, - 7990774066976110280, - 2, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 600373524240865062, + 2399966783289403744, + null, + null, + 61, + 78, + 61, + 78, + 12, + 14, true, - "1.00", - "1.00" + "different formats", + "different formats" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995293, - 7990774599790700074, - 3, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 5852349916619262917, + 2271532486054143502, + null, + null, + 80, + 309, + 80, + 309, + 15, + 51, true, - "0.62", - "0.62" + "Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations.", + "Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995424, - 7990768689730984037, - 3, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 6167933651658664291, + 2995705359694038064, + null, + null, + 91, + 100, + 91, + 100, + 17, + 18, true, - "0.80", - "0.80" + "documents", + "documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "word-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995433, - 7990768688117646262, - 3, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 15378814484553003560, + 5153477104270258604, + null, + null, + 134, + 155, + 134, + 155, + 23, + 26, true, - "0.87", - "0.87" + "machine-interpretable", + "machine-interpretable" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995623, - 7990774617730131452, - 3, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 8106464587473865376, + 5165507430961721845, + null, + null, + 134, + 141, + 134, + 141, + 23, + 24, true, - "0.94", - "0.94" + "machine", + "machine" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995492, - 7990768692352137559, - 4, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 13939400362102325029, + 8086960164932752393, + null, + null, + 142, + 173, + 142, + 173, + 25, + 27, true, - "0.73", - "0.73" + "interpretable formatssuchasJSON", + "interpretable formatssuchasJSON" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "wtoken-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995616, - 7990774618481181961, - 4, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 17425405063522726763, + 3711175375270954936, + null, + null, + 156, + 210, + 156, + 210, + 26, + 35, true, - "0.91", - "0.91" + "formatssuchasJSON,XML,orHTML.However,inthevastmajority", + "formatssuchasJSON,XML,orHTML.However,inthevastmajority" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995623, - 7990774617741217753, - 4, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 12178341415895541463, + 2400214426273604388, + null, + null, + 174, + 177, + 174, + 177, + 28, + 29, true, - "0.94", - "0.94" + "XML", + "XML" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995626, - 7990774612563908250, - 4, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 16381206566158445487, + 7872018268021266315, + null, + null, + 178, + 184, + 178, + 184, + 30, + 31, true, - "0.97", - "0.97" + "orHTML", + "orHTML" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995426, - 7990768689764177354, - 5, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 8106351289640792048, + 5519525843581347965, + null, + null, + 185, + 192, + 185, + 192, + 32, + 33, true, - "0.82", - "0.82" + "However", + "However" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995623, - 7990774617746212517, - 5, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 2743961060608932072, + 3491654727014388637, + null, + null, + 193, + 210, + 193, + 210, + 34, + 35, true, - "0.94", - "0.94" + "inthevastmajority", + "inthevastmajority" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995626, - 7990774612589838230, - 5, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 329104161511786824, + 7360980031546797901, + null, + null, + 214, + 219, + 214, + 219, + 36, + 37, true, - "0.97", - "0.97" + "cases", + "cases" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995627, - 7990774616182657591, - 5, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 2100740202135991641, + 5436840372240293767, + null, + null, + 256, + 277, + 256, + 277, + 44, + 46, true, - "0.98", - "0.98" + "proprietary documents", + "proprietary documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "enum-term-mark-3", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995426, - 7990768689764403839, - 6, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 12289843658171515644, + 3893311679547523383, + null, + null, + 281, + 308, + 281, + 308, + 47, + 50, true, - "0.82", - "0.82" + "companies and organizations", + "companies and organizations" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995617, - 7990774618567229989, - 6, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 5947879506556567994, + 12367501146346618724, + null, + null, + 281, + 290, + 281, + 290, + 47, + 48, true, - "0.92", - "0.92" + "companies", + "companies" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995620, - 7990774618125993935, - 6, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 15694895771625575831, + 5124557589675872289, + null, + null, + 295, + 308, + 295, + 308, + 49, + 50, true, - "0.95", - "0.95" + "organizations", + "organizations" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995626, - 7990774612590090226, - 6, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 18004703537270342816, + 17398565441782732462, + null, + null, + 310, + 374, + 310, + 374, + 51, + 62, true, - "0.97", - "0.97" + "The latter are very often scanned or programmatic PDF documents.", + "The latter are very often scanned or programmatic PDF documents." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995494, - 7990768689217789732, - 7, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 1482873404926828774, + 1056012052691956269, + null, + null, + 347, + 373, + 347, + 373, + 58, + 61, true, - "0.75", - "0.75" + "programmatic PDF documents", + "programmatic PDF documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995617, - 7990774619359159209, - 7, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 18263128607664038326, + 17664748450447018207, + null, + null, + 375, + 458, + 375, + 458, + 62, + 78, true, - "0.92", - "0.92" + "Using the CCS, 1 these types of documents are converted into structured JSON files.", + "Using the CCS, 1 these types of documents are converted into structured JSON files." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995621, - 7990774618108893234, - 7, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 12178341415896221596, + 2400025488731380794, + null, + null, + 385, + 388, + 385, + 388, + 64, + 65, true, - "0.96", - "0.96" + "CCS", + "CCS" ], [ "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "ival", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995626, - 7990774612570894765, - 7, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 17767354399704235161, + 7431448323123128102, + null, + null, + 390, + 391, + 390, + 391, + 66, + 67, true, - "0.97", - "0.97" + "1", + "1" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995435, - 7990774626011945031, - 8, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 329104159243796903, + 8933277972599288721, + null, + null, + 398, + 403, + 398, + 403, + 68, + 69, true, - "0.89", - "0.89" + "types", + "types" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995621, - 7990774618110730915, - 8, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 6167933651658664291, + 2995705359694164349, + null, + null, + 407, + 416, + 407, + 416, + 70, + 71, true, - "0.96", - "0.96" + "documents", + "documents" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995626, - 7990774612562839849, - 8, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 7691191751134078884, + 8559094095047305408, + null, + null, + 436, + 457, + 436, + 457, + 74, + 77, true, - "0.97", - "0.97" + "structured JSON files", + "structured JSON files" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995627, - 7990774616172489304, - 8, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 4430064159297423066, + 1342061296626575350, + null, + null, + 459, + 567, + 459, + 567, + 78, + 103, true, - "0.98", - "0.98" + "Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body.", + "Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body." ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995429, - 7990774613602439211, - 9, - 2, - 0, - 4, - 0, - 4, - 0, - 4, + 2015984486169108115, + 14466633496919987426, + null, + null, + 473, + 484, + 473, + 484, + 80, + 82, true, - "0.83", - "0.83" + "easy access", + "easy access" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "expression", + "word-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995617, - 7990774619353439571, - 9, - 3, - 0, - 4, - 0, - 4, - 0, - 4, + 6182928126338688945, + 18137263452770401078, + null, + null, + 492, + 501, + 492, + 501, + 84, + 87, true, - "0.92", - "0.92" + "meta-data", + "meta-data" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995620, - 7990774618123099565, - 9, - 4, - 0, - 4, - 0, - 4, - 0, - 4, + 389609625618237230, + 5376711039722033744, + null, + null, + 492, + 496, + 492, + 496, + 84, + 85, true, - "0.95", - "0.95" + "meta", + "meta" ], [ - "numval", - "fval", - 12469893451248582632, - "TABLE", - "#/tables/0", + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", 1.0, - 389609625535995621, - 7990774618110462820, - 9, - 5, - 0, - 4, - 0, - 4, - 0, - 4, + 389609625696431489, + 3792901633916076572, + null, + null, + 497, + 501, + 497, + 501, + 86, + 87, true, - "0.96", - "0.96" - ] - ], - "headers": [ - "type", - "subtype", - "subj_hash", - "subj_name", - "subj_path", - "conf", - "hash", - "ihash", - "coor_i", - "coor_j", - "char_i", - "char_j", - "ctok_i", - "ctok_j", - "wtok_i", - "wtok_j", - "wtok-match", - "name", - "original" - ] - }, - "meta": [ - { - "$ref": "#/page-headers/0" - }, - { - "$ref": "#/page-headers/1" - }, - { - "$ref": "#/page-headers/2" - }, - { - "$ref": "#/page-headers/3" - }, - { - "$ref": "#/footnotes/0" - }, - { - "$ref": "#/footnotes/1" - }, - { - "$ref": "#/page-footers/0" - }, - { - "$ref": "#/page-footers/1" - }, - { - "$ref": "#/page-headers/4" - }, - { - "$ref": "#/page-headers/5" - }, - { - "$ref": "#/figures/0/captions/0" - }, - { - "$ref": "#/page-headers/6" - }, - { - "$ref": "#/page-headers/7" - }, - { - "$ref": "#/page-headers/8" - }, - { - "$ref": "#/page-headers/9" - }, - { - "$ref": "#/page-headers/10" - }, - { - "$ref": "#/page-headers/11" - }, - { - "$ref": "#/figures/2/captions/0" - }, - { - "$ref": "#/page-headers/12" - }, - { - "$ref": "#/figures/3/captions/0" - }, - { - "$ref": "#/page-headers/13" - }, - { - "$ref": "#/page-headers/14" - }, - { - "$ref": "#/figures/5/captions/0" - }, - { - "$ref": "#/page-headers/15" - }, - { - "$ref": "#/figures/6/captions/0" - }, - { - "$ref": "#/page-headers/16" - }, - { - "$ref": "#/tables/0/captions/0" - }, - { - "$ref": "#/page-headers/17" - }, - { - "$ref": "#/page-headers/18" - } - ], - "model-application": { - "message": "success", - "success": true - }, - "other": [], - "page-dimensions": [ - { - "height": 782.3619995117188, - "page": 1, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 2, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 3, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 4, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 5, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 6, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 7, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 8, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 9, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 10, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 11, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 12, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 13, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 14, - "width": 595.2760009765625 - }, - { - "height": 782.3619995117188, - "page": 15, - "width": 595.2760009765625 - } - ], - "page-elements": [ - { - "bbox": [ - 44.78739929199219, - 743.57568359375, - 131.78494262695312, - 750.7937622070312 - ], - "iref": "#/page-headers/0", - "name": "page-header", - "orig-order": 15, - "page": 1, - "span": [ - 0, - 28 + "data", + "data" ], - "sref": "#/page-elements/0", - "text-order": 0, - "type": "page-header" - }, - { - "bbox": [ - 146.3265380859375, - 744.093017578125, - 229.3131561279297, - 751.4437866210938 + [ + "parenthesis", + "round brackets", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15394970067510817687, + 15962838465604485796, + null, + null, + 502, + 544, + 502, + 544, + 87, + 98, + true, + "(eg, title, abstract, references, authors)", + "(eg, title, abstract, references, authors)" ], - "iref": "#/page-headers/1", - "name": "page-header", - "orig-order": 16, - "page": 1, - "span": [ - 0, - 26 + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15441160910541487324, + 7422793804087321065, + null, + null, + 503, + 505, + 503, + 505, + 88, + 89, + true, + "eg", + "eg" ], - "sref": "#/page-elements/1", - "text-order": 1, - "type": "page-header" - }, - { - "bbox": [ - 243.7840576171875, - 743.953369140625, - 332.99346923828125, - 751.3480224609375 + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104159220026466, + 8927506052944868849, + null, + null, + 507, + 512, + 507, + 512, + 90, + 91, + true, + "title", + "title" ], - "iref": "#/page-headers/2", - "name": "page-header", - "orig-order": 17, - "page": 1, - "span": [ - 0, - 27 + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15984565858548749625, + 16963224979092543907, + null, + null, + 524, + 534, + 524, + 534, + 94, + 95, + true, + "references", + "references" ], - "sref": "#/page-elements/2", - "text-order": 2, - "type": "page-header" - }, - { - "bbox": [ - 44.6877326965332, - 730.7138671875, - 106.1191635131836, - 737.30078125 + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106397759446161562, + 10642848847630119927, + null, + null, + 536, + 543, + 536, + 543, + 96, + 97, + true, + "authors", + "authors" ], - "iref": "#/page-headers/3", - "name": "page-header", + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15559940474156832047, + 9774609109928111702, + null, + null, + 553, + 566, + 553, + 566, + 100, + 102, + true, + "document body", + "document body" + ], + [ + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 1486767917779853449, + 5612445825939840029, + null, + null, + 568, + 725, + 568, + 725, + 103, + 135, + true, + "The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions.", + "The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions." + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 16381206590630461421, + 7979888348543905422, + null, + null, + 572, + 578, + 572, + 578, + 104, + 105, + true, + "latter", + "latter" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 6165459422936662913, + 156978013262325260, + null, + null, + 596, + 605, + 596, + 605, + 108, + 109, + true, + "subtitles", + "subtitles" + ], + [ + "parenthesis", + "round brackets", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 16942421772084851866, + 558193540038999230, + null, + null, + 606, + 625, + 606, + 625, + 109, + 114, + true, + "(of various levels)", + "(of various levels)" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 7559223825341723371, + 4627385075759212737, + null, + null, + 610, + 624, + 610, + 624, + 111, + 113, + true, + "various levels", + "various levels" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 13968965538538956038, + 3970349775283182601, + null, + null, + 627, + 637, + 627, + 637, + 115, + 116, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104161597245179, + 7405248900434860581, + null, + null, + 639, + 644, + 639, + 644, + 117, + 118, + true, + "lists", + "lists" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 16381206513098478539, + 17467059885496981342, + null, + null, + 646, + 652, + 646, + 652, + 119, + 120, + true, + "tables", + "tables" + ], + [ + "parenthesis", + "round brackets", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 9856539956643571945, + 16934970679196867236, + null, + null, + 653, + 694, + 653, + 694, + 120, + 128, + true, + "(with internal row and column structures)", + "(with internal row and column structures)" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 724989623191821162, + 348519433085549433, + null, + null, + 659, + 671, + 659, + 671, + 122, + 124, + true, + "internal row", + "internal row" + ], + [ + "term", + "enum-term-mark-2", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 9624499087057569438, + 1557688156493519288, + null, + null, + 668, + 682, + 668, + 682, + 123, + 126, + true, + "row and column", + "row and column" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 298685439673823473, + 14473962335563688005, + null, + null, + 676, + 693, + 676, + 693, + 125, + 127, + true, + "column structures", + "column structures" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106397480533647371, + 7988334766998510018, + null, + null, + 696, + 703, + 696, + 703, + 129, + 130, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 14652289689770638970, + 15793206492240518081, + null, + null, + 716, + 724, + 716, + 724, + 133, + 134, + true, + "captions", + "captions" + ], + [ + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 14033738409479099128, + 11129512477137196725, + null, + null, + 726, + 908, + 726, + 908, + 135, + 182, + true, + "O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation.", + "O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation." + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12178341415895653569, + 2400012968392811663, + null, + null, + 742, + 745, + 742, + 745, + 142, + 144, + true, + "r p", + "r p" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104161784480753, + 7493450073534230471, + null, + null, + 754, + 759, + 754, + 759, + 147, + 150, + true, + "r e s", + "r e s" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 16381206513088282843, + 17468687230071587166, + null, + null, + 762, + 768, + 762, + 768, + 151, + 153, + true, + "ti nas", + "ti nas" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106475359937212610, + 17526369764917632585, + null, + null, + 771, + 778, + 771, + 778, + 154, + 158, + true, + "r u c t", + "r u c t" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104161784480766, + 7493450073407919488, + null, + null, + 781, + 786, + 781, + 786, + 159, + 162, + true, + "r e d", + "r e d" + ], + [ + "expression", + "wtoken-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12178341415895464512, + 2400205165144462589, + null, + null, + 785, + 788, + 785, + 788, + 161, + 164, + true, + "d,m", + "d,m" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12178341415895584463, + 2400183328813825594, + null, + null, + 791, + 794, + 791, + 794, + 165, + 167, + true, + "c h", + "c h" + ], + [ + "expression", + "wtoken-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 17917268842395766052, + 17602957436319151954, + null, + null, + 801, + 868, + 801, + 868, + 170, + 175, + true, + "processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely", + "processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12332937572504082233, + 18086575631403056957, + null, + null, + 801, + 818, + 801, + 818, + 170, + 171, + true, + "processableformat", + "processableformat" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 5021804221286126333, + 9610939333420170753, + null, + null, + 819, + 861, + 819, + 861, + 172, + 173, + true, + "theKGiscreatedbyapplyingthreedistincttasks", + "theKGiscreatedbyapplyingthreedistincttasks" + ], + [ + "term", + "enum-term-mark-2", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 346813643429331050, + 431472632380928589, + null, + null, + 869, + 907, + 869, + 907, + 175, + 181, + true, + "extraction, annotation,and aggregation", + "extraction, annotation,and aggregation" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 5303544497514782120, + 15576269407827001608, + null, + null, + 869, + 879, + 869, + 879, + 175, + 176, + true, + "extraction", + "extraction" + ], + [ + "expression", + "wtoken-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 6406713117350634128, + 3855167623003760197, + null, + null, + 881, + 895, + 881, + 895, + 177, + 180, + true, + "annotation,and", + "annotation,and" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15359807916847495711, + 11787112639825647198, + null, + null, + 881, + 891, + 881, + 891, + 177, + 178, + true, + "annotation", + "annotation" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 844664518895955636, + 15595830566546024366, + null, + null, + 896, + 907, + 896, + 907, + 180, + 181, + true, + "aggregation", + "aggregation" + ], + [ + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12172348619957438677, + 66135446863509608, + null, + null, + 909, + 1012, + 909, + 1012, + 182, + 200, + true, + "The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG).", + "The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG)." + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15397570689800643360, + 728258568208254432, + null, + null, + 913, + 934, + 913, + 934, + 183, + 185, + true, + "inherent dependencies", + "inherent dependencies" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104159214088329, + 8900803654811599827, + null, + null, + 955, + 960, + 955, + 960, + 188, + 189, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 3407118261441380378, + 14310748118874608151, + null, + null, + 983, + 1005, + 983, + 1005, + 193, + 196, + true, + "directed acyclic graph", + "directed acyclic graph" + ], + [ + "parenthesis", + "round brackets", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 329104053183779089, + 6177701307152006939, + null, + null, + 1006, + 1011, + 1006, + 1011, + 196, + 199, + true, + "(DAG)", + "(DAG)" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12178341415896112046, + 2400051137088161641, + null, + null, + 1007, + 1010, + 1007, + 1010, + 197, + 198, + true, + "DAG", + "DAG" + ], + [ + "sentence", + "proper", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 12651939560234325286, + 7750091780475029203, + null, + null, + 1013, + 1141, + 1013, + 1141, + 200, + 216, + true, + "We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", + "We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task." + ], + [ + "expression", + "wtoken-concatenation", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 11781935617291778240, + 7483730464872502560, + null, + null, + 1016, + 1111, + 1016, + 1111, + 201, + 209, + true, + "willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss", + "willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 3936626460029664092, + 7321945792584734830, + null, + null, + 1016, + 1052, + 1016, + 1052, + 201, + 202, + true, + "willrefertothisDAGoftasksasadataflow", + "willrefertothisDAGoftasksasadataflow" + ], + [ + "parenthesis", + "round brackets", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 389609625544714259, + 3798349935221358481, + null, + null, + 1052, + 1056, + 1052, + 1056, + 202, + 205, + true, + "(DF)", + "(DF)" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 15441160910541480770, + 7422789976862116916, + null, + null, + 1053, + 1055, + 1053, + 1055, + 203, + 204, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 4779709442550816015, + 12549819219947668408, + null, + null, + 1057, + 1074, + 1057, + 1074, + 206, + 207, + true, + "Inthenextsections", + "Inthenextsections" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 121364253490124098, + 7666483705959028757, + null, + null, + 1075, + 1111, + 1075, + 1111, + 208, + 209, + true, + "weestablishtheconceptofDFsanddiscuss", + "weestablishtheconceptofDFsanddiscuss" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106396517320706028, + 767766212633331407, + null, + null, + 1116, + 1123, + 1116, + 1123, + 210, + 211, + true, + "details", + "details" + ], + [ + "term", + "single-term", + 3276490574487379366, + "TEXT", + "#/texts/38", + 1.0, + 8106396823466748730, + 7340738159572681824, + null, + null, + 1133, + 1140, + 1133, + 1140, + 213, + 215, + true, + "DF task", + "DF task" + ], + [ + "numval", + "fval", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 12178341415896439119, + 1493266672212178244, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.1", + "2.1" + ], + [ + "sentence", + "improper", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 8611251901308287420, + 7694453792543019958, + null, + null, + 4, + 14, + 4, + 14, + 3, + 6, + true, + "| DF tasks", + "| DF tasks" + ], + [ + "term", + "single-term", + 3367451956962330174, + "TEXT", + "#/texts/39", + 1.0, + 14650437071608036927, + 6203129036654967457, + null, + null, + 6, + 14, + 6, + 14, + 4, + 6, + true, + "DF tasks", + "DF tasks" + ], + [ + "sentence", + "proper", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 18361587473539395888, + 13758431587446230571, + null, + null, + 0, + 157, + 0, + 157, + 0, + 32, + true, + "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG.", + "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG." + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 16381206514091025767, + 10250804564386933303, + null, + null, + 3, + 9, + 3, + 9, + 1, + 2, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17767354399704235161, + 13327421909992595494, + null, + null, + 10, + 11, + 10, + 11, + 2, + 3, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 14108247744263052140, + 4614352863788930341, + null, + null, + 25, + 35, + 25, + 35, + 7, + 9, + true, + "minimal DF", + "minimal DF" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 329104159214088329, + 13912131276346951656, + null, + null, + 64, + 69, + 64, + 69, + 16, + 17, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 329104161571401725, + 14203603284583761528, + null, + null, + 95, + 100, + 95, + 100, + 21, + 22, + true, + "order", + "order" + ], + [ + "term", + "enum-term-mark-3", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 13335488353876392384, + 2655829317417497504, + null, + null, + 113, + 139, + 113, + 139, + 24, + 27, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 14652256560445338257, + 3592483441909519898, + null, + null, + 113, + 121, + 113, + 121, + 24, + 25, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 8279380567349713241, + 9458076378316233083, + null, + null, + 126, + 139, + 126, + 139, + 26, + 27, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 2511167939540360071, + 5009883108334261823, + null, + null, + 146, + 156, + 146, + 156, + 29, + 31, + true, + "generic KG", + "generic KG" + ], + [ + "sentence", + "proper", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 14266973449968823144, + 5033210687475997626, + null, + null, + 158, + 232, + 158, + 232, + 32, + 42, + true, + "We will use Figure1toillustratethepurposeandimplementationof each DF task.", + "We will use Figure1toillustratethepurposeandimplementationof each DF task." + ], + [ + "expression", + "wtoken-concatenation", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17090092756343575965, + 11744760582045783073, + null, + null, + 170, + 218, + 170, + 218, + 35, + 38, + true, + "Figure1toillustratethepurposeandimplementationof", + "Figure1toillustratethepurposeandimplementationof" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 16381206514091025767, + 10250804564387005825, + null, + null, + 170, + 176, + 170, + 176, + 35, + 36, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 17767354399704235161, + 13327421909992715764, + null, + null, + 176, + 177, + 176, + 177, + 36, + 37, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 5509744459704235873, + "TEXT", + "#/texts/40", + 1.0, + 8106396823466748730, + 7802822363042755988, + null, + null, + 224, + 231, + 224, + 231, + 39, + 41, + true, + "DF task", + "DF task" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/41", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 4361549176688508574, + "TEXT", + "#/texts/42", + 1.0, + 17767354399704235156, + 7238925036885539838, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "4", + "4" + ], + [ + "sentence", + "improper", + 4361549176688508574, + "TEXT", + "#/texts/42", + 1.0, + 15441160910541485670, + 7918927380167181789, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549176688508574, + "TEXT", + "#/texts/42", + 1.0, + 15441160910541481979, + 7918922223876958481, + null, + null, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "improper", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 8758905122433574314, + 10402008902852922243, + null, + null, + 0, + 18, + 0, + 18, + 0, + 7, + true, + "2.1.1 | Extraction", + "2.1.1 | Extraction" + ], + [ + "expression", + "wtoken-concatenation", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 329104147711421761, + 15707288010084820862, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "2.1.1", + "2.1.1" + ], + [ + "numval", + "fval", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 12178341415896439119, + 1298001416237199126, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 17767354399704235161, + 4264503375288263632, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 12374482891052873875, + "TEXT", + "#/texts/43", + 1.0, + 6329470030377853550, + 13847055292419842236, + null, + null, + 6, + 18, + 6, + 18, + 5, + 7, + true, + "| Extraction", + "| Extraction" + ], + [ + "sentence", + "proper", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 15409610016137670632, + 15197855970598893502, + null, + null, + 0, + 135, + 0, + 135, + 0, + 29, + true, + "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents).", + "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents)." + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5101579281631733460, + 3966842469618203217, + null, + null, + 6, + 21, + 6, + 21, + 2, + 4, + true, + "extraction task", + "extraction task" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 8165740181202876025, + 12959134491963005199, + null, + null, + 35, + 52, + 35, + 52, + 7, + 10, + true, + "new data entities", + "new data entities" + ], + [ + "parenthesis", + "round brackets", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 13286102413593957394, + 6381707955564535797, + null, + null, + 53, + 78, + 53, + 78, + 10, + 16, + true, + "(eg, document components)", + "(eg, document components)" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 15441160910541487324, + 3536104368677879456, + null, + null, + 54, + 56, + 54, + 56, + 11, + 12, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 17524405716142769441, + 8406307373037476042, + null, + null, + 58, + 77, + 58, + 77, + 13, + 15, + true, + "document components", + "document components" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5306796263967471926, + 13469424225666590652, + null, + null, + 87, + 99, + 87, + 99, + 18, + 20, + true, + "original set", + "original set" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 15765380208127739160, + 18326515589191928219, + null, + null, + 103, + 118, + 103, + 118, + 21, + 23, + true, + "source entities", + "source entities" + ], + [ + "parenthesis", + "round brackets", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 10585762328342379081, + 2922498186515452860, + null, + null, + 119, + 134, + 119, + 134, + 23, + 28, + true, + "(eg, documents)", + "(eg, documents)" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 15441160910541487324, + 3536104368677884369, + null, + null, + 120, + 122, + 120, + 122, + 24, + 25, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 6167933651658664291, + 787322433359315506, + null, + null, + 124, + 133, + 124, + 133, + 26, + 27, + true, + "documents", + "documents" + ], + [ + "sentence", + "proper", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 12978591138826985854, + 5403545883930117735, + null, + null, + 136, + 261, + 136, + 261, + 29, + 50, + true, + "During this process, new links are created which connect these newly generated data entities to their original source entity.", + "During this process, new links are created which connect these newly generated data entities to their original source entity." + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 8106476000254393164, + 1225756195807708888, + null, + null, + 148, + 155, + 148, + 155, + 31, + 32, + true, + "process", + "process" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 6172031743812195918, + 18336408684850256697, + null, + null, + 157, + 166, + 157, + 166, + 33, + 35, + true, + "new links", + "new links" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5594093096302267983, + 8802313533208580560, + null, + null, + 215, + 228, + 215, + 228, + 42, + 44, + true, + "data entities", + "data entities" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 17136238213570622776, + 10120757655484606397, + null, + null, + 238, + 260, + 238, + 260, + 46, + 49, + true, + "original source entity", + "original source entity" + ], + [ + "sentence", + "proper", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 14884458065406655707, + 12844102167211528377, + null, + null, + 262, + 403, + 262, + 403, + 50, + 74, + true, + "Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", + "Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files." + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 7471629211517394017, + 10357904862024863501, + null, + null, + 262, + 278, + 262, + 278, + 50, + 52, + true, + "Typical examples", + "Typical examples" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 16021145566749909698, + 8783219642454586604, + null, + null, + 282, + 303, + 282, + 303, + 53, + 56, + true, + "such extraction tasks", + "such extraction tasks" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5303544497514782120, + 7449508868916247566, + null, + null, + 312, + 322, + 312, + 322, + 58, + 59, + true, + "extraction", + "extraction" + ], + [ + "term", + "enum-term-mark-3", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 12690021242452676680, + 6810659946202167494, + null, + null, + 326, + 367, + 326, + 367, + 60, + 68, + true, + "abstracts, paragraphs, tables, or figures", + "abstracts, paragraphs, tables, or figures" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 5950055304304346669, + 7040340631860090954, + null, + null, + 326, + 335, + 326, + 335, + 60, + 61, + true, + "abstracts", + "abstracts" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 13968965538538956038, + 12002401925983499058, + null, + null, + 337, + 347, + 337, + 347, + 62, + 63, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 16381206513098478539, + 18007320769283054809, + null, + null, + 349, + 355, + 349, + 355, + 64, + 65, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 8106397480533647371, + 6262926971414929226, + null, + null, + 360, + 367, + 360, + 367, + 67, + 68, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 2755397864153233778, + "TEXT", + "#/texts/44", + 1.0, + 11975718842215856689, + 7368242614057501661, + null, + null, + 377, + 402, + 377, + 402, + 70, + 73, + true, + "structured document files", + "structured document files" + ], + [ + "sentence", + "proper", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2147087418790148248, + 15000446724502032444, + null, + null, + 0, + 170, + 0, + 170, + 0, + 29, + true, + "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud.", + "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud." + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15351504864593712035, + 2756443006928426392, + null, + null, + 7, + 24, + 7, + 24, + 2, + 4, + true, + "scalability point", + "scalability point" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 389609625619349298, + 9833507871259260213, + null, + null, + 28, + 32, + 28, + 32, + 5, + 6, + true, + "view", + "view" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 389609625631210899, + 9833297825584884817, + null, + null, + 39, + 43, + 39, + 43, + 8, + 9, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8143730520203056904, + 2959281947983247053, + null, + null, + 141, + 153, + 141, + 153, + 23, + 24, + true, + "environments", + "environments" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 329104161517016668, + 10439779028398331162, + null, + null, + 164, + 169, + 164, + 169, + 27, + 28, + true, + "cloud", + "cloud" + ], + [ + "sentence", + "proper", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 463110093402807730, + 15879147521720349842, + null, + null, + 171, + 359, + 171, + 359, + 29, + 61, + true, + "We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database.", + "We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database." + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14814034872218884114, + 6147860370160875462, + null, + null, + 192, + 200, + 192, + 200, + 33, + 34, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15765380208127739160, + 277110281299086050, + null, + null, + 210, + 225, + 210, + 225, + 36, + 38, + true, + "source entities", + "source entities" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8498518363315513669, + 16461247402515495671, + null, + null, + 233, + 249, + 233, + 249, + 40, + 42, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2703018952916355661, + 12156539097906296251, + null, + null, + 271, + 281, + 271, + 281, + 46, + 47, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 2703018952916355661, + 12156539097906285939, + null, + null, + 304, + 314, + 304, + 314, + 51, + 52, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 8165740181202876025, + 16399806825987925636, + null, + null, + 318, + 335, + 318, + 335, + 53, + 56, + true, + "new data entities", + "new data entities" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14650399832241044640, + 12452531445847951870, + null, + null, + 350, + 358, + 350, + 358, + 59, + 60, + true, + "database", + "database" + ], + [ + "sentence", + "proper", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 1890759309653672100, + 6265380164012186787, + null, + null, + 360, + 417, + 360, + 417, + 61, + 69, + true, + "Extraction tasks have no internal synchronization points.", + "Extraction tasks have no internal synchronization points." + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 14447454215787633762, + 11742892003972248650, + null, + null, + 360, + 376, + 360, + 376, + 61, + 63, + true, + "Extraction tasks", + "Extraction tasks" + ], + [ + "term", + "single-term", + 4698316471746130896, + "TEXT", + "#/texts/45", + 1.0, + 15507406252536266458, + 3423090220874343271, + null, + null, + 385, + 416, + 385, + 416, + 65, + 68, + true, + "internal synchronization points", + "internal synchronization points" + ], + [ + "sentence", + "proper", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 3181956257518905626, + 12206342658123130916, + null, + null, + 0, + 171, + 0, + 171, + 0, + 30, + true, + "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result.", + "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result." + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 10222723319085515006, + 12762428063935087370, + null, + null, + 0, + 22, + 0, + 22, + 0, + 3, + true, + "One particular benefit", + "One particular benefit" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 389609625631210899, + 8702509282366718713, + null, + null, + 31, + 35, + 31, + 35, + 5, + 6, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 1964765537092797933, + 15905881283685597246, + null, + null, + 51, + 67, + 51, + 67, + 10, + 12, + true, + "query capability", + "query capability" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 5877539623435777295, + 14123988044322330006, + null, + null, + 75, + 90, + 75, + 90, + 14, + 16, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 10500127719357050942, + 7860892982742338668, + null, + null, + 134, + 156, + 134, + 156, + 24, + 26, + true, + "provenance information", + "provenance information" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 16381206521509536706, + 1134399847748717084, + null, + null, + 164, + 170, + 164, + 170, + 28, + 29, + true, + "result", + "result" + ], + [ + "sentence", + "proper", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 6450838456884111499, + 591579186413275111, + null, + null, + 172, + 282, + 172, + 282, + 30, + 53, + true, + "For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", + "For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact." + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 8106397496085150773, + 4220253075084279441, + null, + null, + 176, + 183, + 176, + 183, + 31, + 32, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 389609625632179162, + 8702524581310998150, + null, + null, + 204, + 208, + 204, + 208, + 37, + 38, + true, + "user", + "user" + ], + [ + "term", + "enum-term-mark-3", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 14465129582137182361, + 8361227597967842953, + null, + null, + 225, + 255, + 225, + 255, + 41, + 47, + true, + "paragraphs, tables, or figures", + "paragraphs, tables, or figures" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 13968965538538956038, + 4949616636950973040, + null, + null, + 225, + 235, + 225, + 235, + 41, + 42, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 16381206513098478539, + 11251268092763423519, + null, + null, + 237, + 243, + 237, + 243, + 43, + 44, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 8106397480533647371, + 9599867079335670867, + null, + null, + 248, + 255, + 248, + 255, + 46, + 47, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 11827267218358801841, + "TEXT", + "#/texts/46", + 1.0, + 5446369751014219582, + 4289580841675524190, + null, + null, + 269, + 281, + 269, + 281, + 50, + 52, + true, + "certain fact", + "certain fact" + ], + [ + "sentence", + "improper", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 12444247655523627494, + 11369889824975445759, + null, + null, + 0, + 18, + 0, + 18, + 0, + 7, + true, + "2.1.2 | Annotation", + "2.1.2 | Annotation" + ], + [ + "expression", + "wtoken-concatenation", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 329104147711421774, + 10145012391943880145, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "2.1.2", + "2.1.2" + ], + [ + "numval", + "fval", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 12178341415896439119, + 9338691878670130519, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 6297710299044869343, + "TEXT", + "#/texts/47", + 1.0, + 17767354399704235162, + 17230475508982970052, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16887173432250904127, + 7631275740120697636, + null, + null, + 0, + 125, + 0, + 125, + 0, + 22, + true, + "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity.", + "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity." + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790844386, + null, + null, + 7, + 22, + 7, + 22, + 2, + 4, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4064125330427640408, + 1392875787154483270, + null, + null, + 33, + 44, + 33, + 44, + 7, + 9, + true, + "NLU methods", + "NLU methods" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12159911606058366544, + 12317116291497839674, + null, + null, + 55, + 72, + 55, + 72, + 11, + 13, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8279380567349713241, + 3197450440257438778, + null, + null, + 83, + 96, + 83, + 96, + 15, + 16, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 3099738444514320422, + 11450082253858502881, + null, + null, + 106, + 124, + 106, + 124, + 18, + 21, + true, + "single data entity", + "single data entity" + ], + [ + "sentence", + "proper", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 10511705512724067221, + 8131498709835920590, + null, + null, + 126, + 255, + 126, + 255, + 22, + 51, + true, + "Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures.", + "Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures." + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 5594093096302267983, + 4253165684837357319, + null, + null, + 132, + 145, + 132, + 145, + 24, + 26, + true, + "data entities", + "data entities" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8106478562764653920, + 94832442363733992, + null, + null, + 168, + 175, + 168, + 175, + 32, + 33, + true, + "snippet", + "snippet" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 389609625631325904, + 2402759180687356962, + null, + null, + 179, + 183, + 179, + 183, + 34, + 35, + true, + "text", + "text" + ], + [ + "parenthesis", + "round brackets", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16141002319857532197, + 16721784361290394175, + null, + null, + 184, + 201, + 184, + 201, + 35, + 41, + true, + "(eg, a paragraph)", + "(eg, a paragraph)" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15441160910541487324, + 8070554360546184492, + null, + null, + 185, + 187, + 185, + 187, + 36, + 37, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 6169141668427353082, + 2952751502941090235, + null, + null, + 191, + 200, + 191, + 200, + 39, + 40, + true, + "paragraph", + "paragraph" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 3570234228108234057, + 10514715831603435127, + null, + null, + 210, + 228, + 210, + 228, + 43, + 45, + true, + "complex structures", + "complex structures" + ], + [ + "term", + "enum-term-mark-3", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 18384137313945358770, + 11714307673095999951, + null, + null, + 237, + 254, + 237, + 254, + 47, + 50, + true, + "tables or figures", + "tables or figures" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206513098478539, + 4960744687131596426, + null, + null, + 237, + 243, + 237, + 243, + 47, + 48, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8106397480533647371, + 7525065665694091915, + null, + null, + 247, + 254, + 247, + 254, + 49, + 50, + true, + "figures", + "figures" + ], + [ + "sentence", + "proper", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 8398014702449778476, + 12904618777003590817, + null, + null, + 256, + 392, + 256, + 392, + 51, + 77, + true, + "The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus.", + "The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus." + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 6179391932551989543, + 16916864605364003434, + null, + null, + 260, + 269, + 260, + 269, + 52, + 54, + true, + "main goal", + "main goal" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790730633, + null, + null, + 277, + 292, + 277, + 292, + 56, + 58, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 447877196158192114, + 9731824282974847876, + null, + null, + 310, + 330, + 310, + 330, + 62, + 64, + true, + "relevant information", + "relevant information" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4106840074686891911, + 6843113354081867506, + null, + null, + 340, + 351, + 340, + 351, + 66, + 68, + true, + "data entity", + "data entity" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206521526353544, + 4862321863264196616, + null, + null, + 357, + 363, + 357, + 363, + 69, + 70, + true, + "regard", + "regard" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206568268873414, + 8285328794802270882, + null, + null, + 371, + 377, + 371, + 377, + 72, + 73, + true, + "domain", + "domain" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206562408205435, + 1670996501012150979, + null, + null, + 385, + 391, + 385, + 391, + 75, + 76, + true, + "corpus", + "corpus" + ], + [ + "sentence", + "proper", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 5036973358471188301, + 5113429010382725861, + null, + null, + 393, + 647, + 393, + 647, + 77, + 125, + true, + "Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", + "Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.)." + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14770886840273751060, + 2461765204774504664, + null, + null, + 399, + 425, + 399, + 425, + 78, + 81, + true, + "different technical fields", + "different technical fields" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14395582171383490407, + 17447243651093722639, + null, + null, + 434, + 455, + 434, + 455, + 82, + 84, + true, + "different annotations", + "different annotations" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 4147688156856812386, + 11375319308790832527, + null, + null, + 461, + 476, + 461, + 476, + 86, + 88, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12159911606058366544, + 12317116291497818179, + null, + null, + 498, + 515, + 498, + 515, + 92, + 94, + true, + "language entities", + "language entities" + ], + [ + "term", + "enum-term-mark-2", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 1860880111352291313, + 13970201774176510008, + null, + null, + 536, + 565, + 536, + 565, + 98, + 104, + true, + "material science, oil and gas", + "material science, oil and gas" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 10788814978233814896, + 4396386661032428573, + null, + null, + 536, + 552, + 536, + 552, + 98, + 100, + true, + "material science", + "material science" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895623363, + 12807178710519925531, + null, + null, + 554, + 557, + 554, + 557, + 101, + 102, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895464135, + 12807165220377806489, + null, + null, + 562, + 565, + 562, + 565, + 103, + 104, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15408216039387112052, + 9368993532817081616, + null, + null, + 575, + 589, + 575, + 589, + 107, + 109, + true, + "basic entities", + "basic entities" + ], + [ + "parenthesis", + "round brackets", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 7045104231246392140, + 4304878346281955020, + null, + null, + 590, + 646, + 590, + 646, + 109, + 124, + true, + "(eg, noun phrases, abbreviations, unit and values, etc.)", + "(eg, noun phrases, abbreviations, unit and values, etc.)" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 15441160910541487324, + 8070554360545437401, + null, + null, + 591, + 593, + 591, + 593, + 110, + 111, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 14689069836122597249, + 3316427666339118054, + null, + null, + 595, + 607, + 595, + 607, + 112, + 114, + true, + "noun phrases", + "noun phrases" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 1348537160828121453, + 2554508032215814292, + null, + null, + 609, + 622, + 609, + 622, + 115, + 116, + true, + "abbreviations", + "abbreviations" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 389609625632190829, + 2402804839141248529, + null, + null, + 624, + 628, + 624, + 628, + 117, + 118, + true, + "unit", + "unit" + ], + [ + "term", + "single-term", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 16381206519529683092, + 8830939125119703029, + null, + null, + 633, + 639, + 633, + 639, + 119, + 120, + true, + "values", + "values" + ], + [ + "expression", + "common", + 7158837349769150986, + "TEXT", + "#/texts/48", + 1.0, + 12178341415895450733, + 12807202931780954464, + null, + null, + 641, + 645, + 641, + 645, + 121, + 123, + true, + "etc", + "etc." + ], + [ + "sentence", + "proper", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 7779111357672011576, + 17653885300415270377, + null, + null, + 0, + 169, + 0, + 167, + 0, + 27, + true, + "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks.", + "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks." + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 9847720278307775508, + 16897299718511055664, + null, + null, + 7, + 28, + 7, + 28, + 2, + 4, + true, + "technical perspective", + "technical perspective" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12159911606058366544, + 9840970767482962073, + null, + null, + 34, + 51, + 34, + 51, + 6, + 8, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 18281631562721121276, + 4629272614700109136, + null, + null, + 85, + 105, + 85, + 105, + 13, + 16, + true, + "multiple NLU methods", + "multiple NLU methods" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6765267146872757605, + 14403665241565123375, + null, + null, + 120, + 147, + 120, + 147, + 19, + 22, + true, + "complex regular expressions", + "complex regular expressions" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 3706863384497465111, + 8221205291804750590, + null, + null, + 155, + 168, + 153, + 166, + 24, + 26, + true, + "LSTM networks", + "LSTM networks" + ], + [ + "numval", + "fval", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12178341415896310341, + 6520357412536397527, + null, + null, + 170, + 173, + 168, + 171, + 27, + 30, + true, + "5,6", + "5,6" + ], + [ + "sentence", + "proper", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6580876879929044039, + 6602773183063942475, + null, + null, + 174, + 300, + 172, + 296, + 30, + 58, + true, + "We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models.", + "We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models." + ], + [ + "expression", + "word-concatenation", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 5044385734724420019, + 7407814039821929674, + null, + null, + 184, + 200, + 182, + 198, + 32, + 39, + true, + "state-of-the-art", + "state-of-the-art" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104161640023790, + 10278330567538531329, + null, + null, + 184, + 189, + 182, + 187, + 32, + 33, + true, + "state", + "state" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 14165185383975822266, + 3705189882831744660, + null, + null, + 197, + 213, + 195, + 211, + 38, + 41, + true, + "art NLU toolkits", + "art NLU toolkits" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104162342367500, + 10244334105852443240, + null, + null, + 222, + 227, + 220, + 225, + 43, + 44, + true, + "Spacy", + "Spacy" + ], + [ + "numval", + "ival", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17767354399704235159, + 17919867067928731763, + null, + null, + 228, + 229, + 226, + 227, + 44, + 45, + true, + "7", + "7" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 389609625695864805, + 14626057431240642030, + null, + null, + 233, + 237, + 231, + 235, + 46, + 47, + true, + "NLTK", + "NLTK" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 16381206559341571450, + 1447158595331306969, + null, + null, + 261, + 267, + 257, + 263, + 52, + 53, + true, + "custom", + "custom" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 8774094861370452817, + 2952593523093736834, + null, + null, + 274, + 299, + 270, + 295, + 54, + 57, + true, + "entity recognition models", + "entity recognition models" + ], + [ + "sentence", + "proper", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17212207186390634612, + 10030533856655835592, + null, + null, + 301, + 403, + 297, + 399, + 58, + 75, + true, + "A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper.", + "A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper." + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 3850923311242910952, + 11129645616881861710, + null, + null, + 303, + 325, + 299, + 321, + 59, + 61, + true, + "detailed investigation", + "detailed investigation" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17250040061445046934, + 11791038398155808201, + null, + null, + 335, + 349, + 331, + 345, + 63, + 65, + true, + "NLU annotators", + "NLU annotators" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104161784846775, + 10257930626923248048, + null, + null, + 383, + 388, + 379, + 384, + 70, + 71, + true, + "scope", + "scope" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 329104161668023890, + 10258894979066678110, + null, + null, + 397, + 402, + 393, + 398, + 73, + 74, + true, + "paper", + "paper" + ], + [ + "sentence", + "proper", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 9979157195255435816, + 5766649868617218566, + null, + null, + 404, + 542, + 400, + 538, + 75, + 104, + true, + "However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", + "However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model." + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 16381206514091025767, + 251249362636336734, + null, + null, + 416, + 422, + 412, + 418, + 78, + 79, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 17767354399704235162, + 17919867064012061628, + null, + null, + 423, + 424, + 419, + 420, + 79, + 80, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 13127417780371024365, + 3643698460906758134, + null, + null, + 438, + 453, + 434, + 449, + 84, + 86, + true, + "different types", + "different types" + ], + [ + "parenthesis", + "round brackets", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6251248012671019556, + 8400220771602544974, + null, + null, + 463, + 475, + 459, + 471, + 88, + 91, + true, + "(geological)", + "(geological)" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 14652256560445338257, + 5505418234470722173, + null, + null, + 476, + 484, + 472, + 480, + 91, + 92, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6169141668427353082, + 8227784820734106454, + null, + null, + 496, + 505, + 492, + 501, + 95, + 96, + true, + "paragraph", + "paragraph" + ], + [ + "term", + "enum-term-mark-2", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 836843906912689304, + 5336535317156291017, + null, + null, + 513, + 541, + 509, + 537, + 98, + 103, + true, + "oil and gas annotation model", + "oil and gas annotation model" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 12178341415895623363, + 6519272468740598962, + null, + null, + 513, + 516, + 509, + 512, + 98, + 99, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 1150871476689677866, + "TEXT", + "#/texts/49", + 1.0, + 6054420812037878801, + 17086623133308882247, + null, + null, + 521, + 541, + 517, + 537, + 100, + 103, + true, + "gas annotation model", + "gas annotation model" + ], + [ + "sentence", + "proper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15219000500037869602, + 8856002696045104109, + null, + null, + 0, + 130, + 0, + 130, + 0, + 26, + true, + "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend.", + "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend." + ], + [ + "numval", + "ival", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704235161, + 8171001275372472332, + null, + null, + 11, + 12, + 11, + 12, + 2, + 3, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106397495779651824, + 14812410445927163850, + null, + null, + 30, + 37, + 30, + 37, + 8, + 9, + true, + "excerpt", + "excerpt" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 1037258523789473353, + 1591720788223851008, + null, + null, + 49, + 60, + 49, + 60, + 12, + 13, + true, + "annotations", + "annotations" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 23744483499852859, + 1477981163033721681, + null, + null, + 61, + 103, + 61, + 103, + 13, + 20, + true, + "(both language entities and relationships)", + "(both language entities and relationships)" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 12159911606058366544, + 17237419887761715230, + null, + null, + 67, + 84, + 67, + 84, + 15, + 17, + true, + "language entities", + "language entities" + ], + [ + "term", + "enum-term-mark-3", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 13335488353876392384, + 17546248817268837810, + null, + null, + 76, + 102, + 76, + 102, + 16, + 19, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403321989315, + null, + null, + 89, + 102, + 89, + 102, + 18, + 19, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106396937135332179, + 5430416277392822917, + null, + null, + 122, + 129, + 122, + 129, + 24, + 25, + true, + "backend", + "backend" + ], + [ + "sentence", + "proper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16069762016341043397, + 4644962557882819656, + null, + null, + 131, + 233, + 131, + 233, + 26, + 44, + true, + "It is noteworthy here that relationships are stored as (weighted) links between two entity references.", + "It is noteworthy here that relationships are stored as (weighted) links between two entity references." + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322018040, + null, + null, + 158, + 171, + 158, + 171, + 31, + 32, + true, + "relationships", + "relationships" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 4962669361213862895, + 5695197211497118759, + null, + null, + 186, + 196, + 186, + 196, + 35, + 38, + true, + "(weighted)", + "(weighted)" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 3523694329852808542, + 11676604379961027572, + null, + null, + 215, + 232, + 215, + 232, + 41, + 43, + true, + "entity references", + "entity references" + ], + [ + "sentence", + "improper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704232711, + 8171001056559370747, + null, + null, + 234, + 236, + 234, + 235, + 44, + 45, + true, + "\u00a7", + "\u00a7" + ], + [ + "sentence", + "proper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 9876246604275471522, + 3163098707953307694, + null, + null, + 237, + 394, + 236, + 393, + 45, + 70, + true, + "The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG.", + "The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG." + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 329104159157898666, + 16038035701522583509, + null, + null, + 241, + 246, + 240, + 245, + 46, + 47, + true, + "usage", + "usage" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15984565858548749625, + 7081370485737015030, + null, + null, + 250, + 260, + 249, + 259, + 48, + 49, + true, + "references", + "references" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 18000510673442216122, + 13486560897367143376, + null, + null, + 269, + 285, + 268, + 284, + 50, + 52, + true, + "data duplication", + "data duplication" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322071374, + null, + null, + 324, + 337, + 323, + 336, + 58, + 59, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 14652256560445338257, + 4854961473690419140, + null, + null, + 375, + 383, + 374, + 382, + 65, + 66, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15441160910541480204, + 9487537061622411973, + null, + null, + 391, + 393, + 390, + 392, + 68, + 69, + true, + "KG", + "KG" + ], + [ + "sentence", + "proper", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16887832449551411027, + 18045699081531244891, + null, + null, + 395, + 580, + 394, + 579, + 70, + 107, + true, + "The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", + "The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4)." + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 16381206590630461421, + 183862245083634869, + null, + null, + 399, + 405, + 398, + 404, + 71, + 72, + true, + "latter", + "latter" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 844664518895955636, + 3549384269706196539, + null, + null, + 421, + 432, + 420, + 431, + 74, + 75, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322029163, + null, + null, + 440, + 453, + 439, + 452, + 77, + 78, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15983101815597714776, + 14869920223655256325, + null, + null, + 478, + 490, + 477, + 489, + 82, + 84, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15441160910541480204, + 9487537061622371109, + null, + null, + 517, + 519, + 516, + 518, + 90, + 91, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 329104161571401725, + 16112186615218301516, + null, + null, + 523, + 528, + 522, + 527, + 92, + 93, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8279380567349713241, + 14280654403322027617, + null, + null, + 546, + 559, + 545, + 558, + 96, + 97, + true, + "relationships", + "relationships" + ], + [ + "parenthesis", + "round brackets", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 15147261052050380573, + 340133695651638722, + null, + null, + 560, + 579, + 559, + 578, + 97, + 106, + true, + "(see section 2.1.4)", + "(see section 2.1.4)" + ], + [ + "term", + "single-term", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 8106478708629288965, + 3196712847993308076, + null, + null, + 565, + 572, + 564, + 571, + 99, + 100, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 12178341415896439119, + 15419364153911617129, + null, + null, + 573, + 576, + 572, + 575, + 100, + 103, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 5163702913945903725, + "TEXT", + "#/texts/50", + 1.0, + 17767354399704235156, + 8171001275288926016, + null, + null, + 577, + 578, + 576, + 577, + 104, + 105, + true, + "4", + "4" + ], + [ + "sentence", + "proper", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 11152699847125264436, + 12077505865209116602, + null, + null, + 0, + 111, + 0, + 111, + 0, + 18, + true, + "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract.", + "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract." + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 16381206531186882505, + 282429509959017085, + null, + null, + 0, + 6, + 0, + 6, + 0, + 1, + true, + "FIGURE", + "FIGURE" + ], + [ + "numval", + "ival", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 17767354399704235162, + 11171804972701775781, + null, + null, + 7, + 8, + 7, + 8, + 1, + 2, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 11591880785922286007, + 11861251288579682434, + null, + null, + 9, + 21, + 9, + 21, + 2, + 3, + true, + "Illustration", + "Illustration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12159911606058366544, + 6926612245340290214, + null, + null, + 42, + 59, + 42, + 59, + 6, + 8, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 18145815066853361043, + 5215848738409166677, + null, + null, + 78, + 90, + 78, + 90, + 11, + 13, + true, + "rich snippet", + "rich snippet" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 15509823190207649705, + 5960259217804932002, + null, + null, + 97, + 110, + 97, + 110, + 15, + 17, + true, + "AAPG abstract", + "AAPG abstract" + ], + [ + "numval", + "ival", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 17767354399704235152, + 11171804967920230653, + null, + null, + 112, + 113, + 112, + 113, + 18, + 19, + true, + "8", + "8" + ], + [ + "sentence", + "improper", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 3748962472868307378, + 12273141133482550738, + null, + null, + 114, + 220, + 114, + 220, + 19, + 37, + true, + "The language entities here are all related to geological concepts in the domain of oil and gas exploration", + "The language entities here are all related to geological concepts in the domain of oil and gas exploration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12159911606058366544, + 6926612245340286517, + null, + null, + 118, + 135, + 118, + 135, + 20, + 22, + true, + "language entities", + "language entities" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 13713515574324394347, + 12590722771456480016, + null, + null, + 160, + 179, + 160, + 179, + 27, + 29, + true, + "geological concepts", + "geological concepts" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 16381206568268873414, + 9023158530069285353, + null, + null, + 187, + 193, + 187, + 193, + 31, + 32, + true, + "domain", + "domain" + ], + [ + "term", + "enum-term-mark-2", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 5515747999597331548, + 15418084629649194710, + null, + null, + 197, + 220, + 197, + 220, + 33, + 37, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 12178341415895623363, + 5058536426346121044, + null, + null, + 197, + 200, + 197, + 200, + 33, + 34, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 5462319091745771382, + "TEXT", + "#/texts/51", + 1.0, + 10692163443301812358, + 705285979073830157, + null, + null, + 205, + 220, + 205, + 220, + 35, + 37, + true, + "gas exploration", + "gas exploration" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/52", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "sentence", + "proper", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 12645284953227579523, + 12974947662755325996, + null, + null, + 0, + 132, + 0, + 132, + 0, + 23, + true, + "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships.", + "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships." + ], + [ + "numval", + "ival", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17767354399704235161, + 11087830826518420632, + null, + null, + 8, + 9, + 8, + 9, + 1, + 2, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8106396829827034261, + 10257142782963866660, + null, + null, + 10, + 17, + 10, + 17, + 2, + 3, + true, + "Excerpt", + "Excerpt" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17270283073797262012, + 9056082944943925130, + null, + null, + 25, + 43, + 25, + 43, + 5, + 7, + true, + "annotated abstract", + "annotated abstract" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 6208047174578296014, + 11157541887347812593, + null, + null, + 52, + 62, + 52, + 62, + 9, + 11, + true, + "AAPG paper", + "AAPG paper" + ], + [ + "numval", + "ival", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 17767354399704235152, + 11087830826319423704, + null, + null, + 63, + 64, + 63, + 64, + 11, + 12, + true, + "8", + "8" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 10146060738548390539, + 10395901779297563099, + null, + null, + 74, + 87, + 74, + 87, + 14, + 16, + true, + "original text", + "original text" + ], + [ + "term", + "enum-term-mark-3", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 13335488353876392384, + 7678075181696158825, + null, + null, + 105, + 131, + 105, + 131, + 19, + 22, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 14652256560445338257, + 23671674995088716, + null, + null, + 105, + 113, + 105, + 113, + 19, + 20, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8279380567349713241, + 2766472369779690779, + null, + null, + 118, + 131, + 118, + 131, + 21, + 22, + true, + "relationships", + "relationships" + ], + [ + "sentence", + "proper", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 12234165085298724165, + 11085803697781807661, + null, + null, + 133, + 208, + 133, + 208, + 23, + 38, + true, + "Note that relationships are typed (encoded in the field name) and weighted.", + "Note that relationships are typed (encoded in the field name) and weighted." + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 8279380567349713241, + 2766472369779690668, + null, + null, + 143, + 156, + 143, + 156, + 25, + 26, + true, + "relationships", + "relationships" + ], + [ + "parenthesis", + "round brackets", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 9255725192624412487, + 5908058035068237778, + null, + null, + 167, + 194, + 167, + 194, + 28, + 35, + true, + "(encoded in the field name)", + "(encoded in the field name)" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 5376193215510313776, + 2300794343146009047, + null, + null, + 183, + 193, + 183, + 193, + 32, + 34, + true, + "field name", + "field name" + ], + [ + "sentence", + "proper", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 15149154235125366870, + 971355569745530824, + null, + null, + 209, + 295, + 209, + 295, + 38, + 51, + true, + "The weight reflects the confidence of the language annotation model during extraction.", + "The weight reflects the confidence of the language annotation model during extraction." + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 16381206557786164800, + 9609671604520235392, + null, + null, + 213, + 219, + 213, + 219, + 39, + 40, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 2702871111219879214, + 4793337639583017260, + null, + null, + 233, + 243, + 233, + 243, + 42, + 43, + true, + "confidence", + "confidence" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 11168190807530039563, + 9616121456751772425, + null, + null, + 251, + 276, + 251, + 276, + 45, + 48, + true, + "language annotation model", + "language annotation model" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 5303544497514782120, + 15086525081846414293, + null, + null, + 284, + 294, + 284, + 294, + 49, + 50, + true, + "extraction", + "extraction" + ], + [ + "sentence", + "improper", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 1784572687958230117, + 4252148808147073472, + null, + null, + 296, + 421, + 296, + 421, + 51, + 70, + true, + "Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", + "Relationships are always defined on detected entities, and will therefore use references defining a link between two entities" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 1808270638656316647, + 6568689023336282632, + null, + null, + 296, + 309, + 296, + 309, + 51, + 52, + true, + "Relationships", + "Relationships" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 7548687258225017395, + 17484470566150792413, + null, + null, + 332, + 349, + 332, + 349, + 56, + 58, + true, + "detected entities", + "detected entities" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 15984565858548749625, + 13397156481796417033, + null, + null, + 374, + 384, + 374, + 384, + 63, + 64, + true, + "references", + "references" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 389609625633316251, + 15262543275847637295, + null, + null, + 396, + 400, + 396, + 400, + 66, + 67, + true, + "link", + "link" + ], + [ + "term", + "single-term", + 958124839653591304, + "TEXT", + "#/texts/53", + 1.0, + 14652256560445338257, + 23671674995075921, + null, + null, + 413, + 421, + 413, + 421, + 69, + 70, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 3379820760040661991, + 9630040689393067965, + null, + null, + 0, + 71, + 0, + 71, + 0, + 12, + true, + "From a scaling perspective, this task is again embarrassingly parallel.", + "From a scaling perspective, this task is again embarrassingly parallel." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14759509177149592461, + 3924802885839203615, + null, + null, + 7, + 26, + 7, + 26, + 2, + 4, + true, + "scaling perspective", + "scaling perspective" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 389609625631210899, + 12900735516089250680, + null, + null, + 33, + 37, + 33, + 37, + 6, + 7, + true, + "task", + "task" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 9719846915474708286, + 1724806033594978345, + null, + null, + 72, + 225, + 72, + 225, + 12, + 39, + true, + "Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity.", + "Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 5101579281631733460, + 16368993288317499605, + null, + null, + 83, + 98, + 83, + 98, + 14, + 16, + true, + "extraction task", + "extraction task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4147688156856812386, + 10246888153284075300, + null, + null, + 104, + 119, + 104, + 119, + 18, + 20, + true, + "annotation task", + "annotation task" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8165740181202876025, + 2261200330077561310, + null, + null, + 136, + 153, + 136, + 153, + 23, + 26, + true, + "new data entities", + "new data entities" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14814151107054759097, + 7178764862843143005, + null, + null, + 176, + 184, + 176, + 184, + 30, + 32, + true, + "new data", + "new data" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4106840074686891911, + 5880300922269562472, + null, + null, + 213, + 224, + 213, + 224, + 36, + 38, + true, + "data entity", + "data entity" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 6497043505161373931, + 3986341826774012937, + null, + null, + 226, + 421, + 226, + 421, + 39, + 72, + true, + "We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources.", + "We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources." + ], + [ + "term", + "enum-term-mark-2", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 15819622148818218229, + 9356434918116130285, + null, + null, + 254, + 277, + 254, + 277, + 44, + 47, + true, + "entity and relationship", + "entity and relationship" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206564577775616, + 1763085460560834451, + null, + null, + 254, + 260, + 254, + 260, + 44, + 45, + true, + "entity", + "entity" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 398121334352453215, + 4070873708922197400, + null, + null, + 265, + 288, + 265, + 288, + 46, + 48, + true, + "relationship annotators", + "relationship annotators" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17524405716142769441, + 4212954867232897467, + null, + null, + 296, + 315, + 296, + 315, + 50, + 52, + true, + "document components", + "document components" + ], + [ + "parenthesis", + "round brackets", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8218859132330537689, + 15199448891854533062, + null, + null, + 316, + 342, + 316, + 342, + 52, + 60, + true, + "(paragraphs, tables, etc.)", + "(paragraphs, tables, etc.)" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13968965538538956038, + 5978660594977828560, + null, + null, + 317, + 327, + 317, + 327, + 53, + 54, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206513098478539, + 17629065645663027433, + null, + null, + 329, + 335, + 329, + 335, + 55, + 56, + true, + "tables", + "tables" + ], + [ + "expression", + "common", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415895450733, + 14365751987021702484, + null, + null, + 337, + 341, + 337, + 341, + 57, + 59, + true, + "etc", + "etc." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 14814034872218884114, + 8691748728752973432, + null, + null, + 346, + 354, + 346, + 354, + 61, + 62, + true, + "parallel", + "parallel" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13985988710970420061, + 11652619962770153512, + null, + null, + 375, + 385, + 375, + 385, + 65, + 66, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 11188780108166616166, + 18209882539381064623, + null, + null, + 393, + 420, + 393, + 420, + 68, + 71, + true, + "available compute resources", + "available compute resources" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12102171329672562237, + 1970937600463832378, + null, + null, + 422, + 479, + 422, + 479, + 72, + 80, + true, + "Annotation tasks have no internal synchronization points.", + "Annotation tasks have no internal synchronization points." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 10983505580038966516, + 1311386809729224802, + null, + null, + 422, + 438, + 422, + 438, + 72, + 74, + true, + "Annotation tasks", + "Annotation tasks" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 15507406252536266458, + 8871487447783311304, + null, + null, + 447, + 478, + 447, + 478, + 76, + 79, + true, + "internal synchronization points", + "internal synchronization points" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13423572652191948422, + 13417187127495456897, + null, + null, + 480, + 570, + 480, + 570, + 80, + 97, + true, + "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs.", + "From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 16381206562408205435, + 7059006330358361907, + null, + null, + 487, + 493, + 487, + 493, + 82, + 83, + true, + "corpus", + "corpus" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415896426714, + 14365907824633173416, + null, + null, + 503, + 506, + 503, + 506, + 85, + 86, + true, + "100", + "100" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12178341415896430891, + 14365907899184508224, + null, + null, + 507, + 510, + 507, + 510, + 86, + 87, + true, + "000", + "000" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 6167933651658664291, + 5381718186587203635, + null, + null, + 511, + 520, + 511, + 520, + 87, + 88, + true, + "documents", + "documents" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17767354399704235163, + 2072240023181579806, + null, + null, + 549, + 550, + 549, + 550, + 93, + 94, + true, + "3", + "3" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 8014457366322397053, + 205843253777526494, + null, + null, + 551, + 569, + 551, + 569, + 94, + 96, + true, + "million paragraphs", + "million paragraphs" + ], + [ + "sentence", + "proper", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 10872421192555193150, + 7255995964890387843, + null, + null, + 571, + 687, + 571, + 687, + 97, + 114, + true, + "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", + "Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers." + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 12229168076678998995, + 12510724719610977574, + null, + null, + 580, + 599, + 580, + 599, + 98, + 100, + true, + "unlimited resources", + "unlimited resources" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 4147688156856812386, + 10246888153283929577, + null, + null, + 605, + 620, + 605, + 620, + 102, + 104, + true, + "annotation task", + "annotation task" + ], + [ + "numval", + "ival", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 17767354399704235163, + 2072240023181582239, + null, + null, + 657, + 658, + 657, + 658, + 109, + 110, + true, + "3", + "3" + ], + [ + "term", + "single-term", + 1448405324616602032, + "TEXT", + "#/texts/54", + 1.0, + 13272446480061434936, + 11845447386382530535, + null, + null, + 659, + 686, + 659, + 686, + 110, + 113, + true, + "million independent workers", + "million independent workers" + ], + [ + "sentence", + "improper", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 18150799209915986647, + 9689816716635830050, + null, + null, + 0, + 31, + 0, + 31, + 0, + 9, + true, + "2.1.3 | Aggregation of entities", + "2.1.3 | Aggregation of entities" + ], + [ + "expression", + "wtoken-concatenation", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 329104147711421775, + 6272026767940968313, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "2.1.3", + "2.1.3" + ], + [ + "numval", + "fval", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 12178341415896439119, + 18028276311967117811, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 17767354399704235163, + 11990453707355571146, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "3", + "3" + ], + [ + "term", + "single-term", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 12405860233317252202, + 7842840693633921498, + null, + null, + 6, + 19, + 6, + 19, + 5, + 7, + true, + "| Aggregation", + "| Aggregation" + ], + [ + "term", + "single-term", + 2617775076168299948, + "TEXT", + "#/texts/55", + 1.0, + 14652256560445338257, + 9338367723294437133, + null, + null, + 23, + 31, + 23, + 31, + 8, + 9, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11367271216130118432, + 10099806209711852677, + null, + null, + 0, + 165, + 0, + 165, + 0, + 32, + true, + "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in.", + "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387470974, + null, + null, + 4, + 20, + 4, + 20, + 1, + 3, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787560493, + null, + null, + 25, + 33, + 25, + 33, + 4, + 5, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5101579281631733460, + 17565787919127588672, + null, + null, + 51, + 66, + 51, + 66, + 9, + 11, + true, + "extraction task", + "extraction task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161787480235, + 14057633201980204607, + null, + null, + 75, + 80, + 75, + 80, + 14, + 15, + true, + "sense", + "sense" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15983101815597714776, + 70459589720017255, + null, + null, + 96, + 108, + 96, + 108, + 18, + 20, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206579112188113, + 11083441268366703806, + null, + null, + 135, + 141, + 135, + 141, + 26, + 27, + true, + "source", + "source" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13872144822242467415, + 12063372182631901768, + null, + null, + 166, + 296, + 166, + 295, + 32, + 53, + true, + "In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction.", + "In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14650447861280948245, + 2874163810587468663, + null, + null, + 169, + 177, + 169, + 177, + 33, + 34, + true, + "addition", + "addition" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5303544497514782120, + 1457149129547678585, + null, + null, + 181, + 191, + 181, + 191, + 35, + 36, + true, + "extraction", + "extraction" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6746037511376410145, + 11728944427356384927, + null, + null, + 197, + 220, + 197, + 220, + 38, + 41, + true, + "entity aggregation task", + "entity aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14708728462097652999, + 15263769781535282609, + null, + null, + 236, + 256, + 236, + 255, + 44, + 47, + true, + "similarity metric \u00b6", + "similarity metric \u00b6" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787578846, + null, + null, + 269, + 277, + 268, + 276, + 49, + 50, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5303544497514782120, + 1457149129547684243, + null, + null, + 285, + 295, + 284, + 294, + 51, + 52, + true, + "extraction", + "extraction" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5037884732683442691, + 10436193456753512017, + null, + null, + 297, + 477, + 296, + 476, + 53, + 86, + true, + "This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated.", + "This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14087387480976987019, + 17412828544627723541, + null, + null, + 302, + 312, + 301, + 311, + 54, + 55, + true, + "similarity", + "similarity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787574259, + null, + null, + 339, + 347, + 338, + 346, + 60, + 61, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12525375003379905483, + 6952514242140296043, + null, + null, + 361, + 382, + 360, + 381, + 64, + 67, + true, + "same language concept", + "same language concept" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 7459064322160295058, + 16552287479938986451, + null, + null, + 420, + 433, + 419, + 432, + 75, + 77, + true, + "single entity", + "single entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15441160910541480204, + 12103870215605198083, + null, + null, + 441, + 443, + 440, + 442, + 79, + 80, + true, + "KG", + "KG" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14134432982956085165, + 12558270821582062646, + null, + null, + 478, + 600, + 477, + 599, + 86, + 107, + true, + "In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components.", + "In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206514091025767, + 5867212895741027859, + null, + null, + 481, + 487, + 480, + 486, + 87, + 88, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17767354399704235161, + 6534238515883477149, + null, + null, + 488, + 489, + 487, + 488, + 88, + 89, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387177248, + null, + null, + 515, + 531, + 514, + 530, + 94, + 96, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104159243796903, + 5660915257099444274, + null, + null, + 540, + 545, + 539, + 544, + 98, + 99, + true, + "types", + "types" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031787526504, + null, + null, + 549, + 557, + 548, + 556, + 100, + 101, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12805042060785076198, + 13666698149907793309, + null, + null, + 565, + 599, + 564, + 598, + 102, + 106, + true, + "many different document components", + "many different document components" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14785096463026995035, + 7734917022209301213, + null, + null, + 601, + 711, + 600, + 710, + 107, + 124, + true, + "These entity types could be for example materials and properties or geological formations and geological ages.", + "These entity types could be for example materials and properties or geological formations and geological ages." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 7246756799784975282, + 3536616615701721498, + null, + null, + 607, + 619, + 606, + 618, + 108, + 110, + true, + "entity types", + "entity types" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8873555710417801716, + 11367606112035812080, + null, + null, + 633, + 650, + 632, + 649, + 113, + 115, + true, + "example materials", + "example materials" + ], + [ + "term", + "enum-term-mark-3", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15083712120508435047, + 2420676786096945602, + null, + null, + 641, + 665, + 640, + 664, + 114, + 117, + true, + "materials and properties", + "materials and properties" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14088628410271132453, + 2902683368439961232, + null, + null, + 655, + 665, + 654, + 664, + 116, + 117, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9648537698556423826, + 5670564797859801155, + null, + null, + 669, + 690, + 668, + 689, + 118, + 120, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9663226904190425014, + 7389081649832461915, + null, + null, + 695, + 710, + 694, + 709, + 121, + 123, + true, + "geological ages", + "geological ages" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 2993305092365996286, + 17194587297594850406, + null, + null, + 712, + 912, + 711, + 911, + 124, + 162, + true, + "The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times.", + "The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161597470987, + 14056796539026990417, + null, + null, + 716, + 721, + 715, + 720, + 125, + 126, + true, + "links", + "links" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15983101815597714776, + 70459589720141381, + null, + null, + 737, + 749, + 736, + 748, + 128, + 130, + true, + "new entities", + "new entities" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17621545813270270871, + 12868479750299862546, + null, + null, + 759, + 772, + 758, + 771, + 132, + 134, + true, + "source entity", + "source entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6184772648035902755, + 13644276956832070572, + null, + null, + 803, + 812, + 802, + 811, + 139, + 140, + true, + "frequency", + "frequency" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 329104161505834046, + 14058488092625120224, + null, + null, + 820, + 825, + 819, + 824, + 142, + 143, + true, + "match", + "match" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206557786164800, + 13942859270411079637, + null, + null, + 852, + 858, + 851, + 857, + 151, + 152, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11068190489576906314, + 3485226378154122039, + null, + null, + 866, + 881, + 865, + 880, + 154, + 156, + true, + "language entity", + "language entity" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17200993877087579427, + 12244708574954642458, + null, + null, + 897, + 911, + 896, + 910, + 159, + 161, + true, + "multiple times", + "multiple times" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8311220471757940334, + 2005274358531069313, + null, + null, + 913, + 999, + 912, + 998, + 162, + 177, + true, + "From an implementation point of view, the aggregation task for entities is nontrivial.", + "From an implementation point of view, the aggregation task for entities is nontrivial." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 469538851381879616, + 14202402575145459277, + null, + null, + 921, + 941, + 920, + 940, + 164, + 166, + true, + "implementation point", + "implementation point" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 389609625619349298, + 2424396184693639955, + null, + null, + 945, + 949, + 944, + 948, + 167, + 168, + true, + "view", + "view" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9614479359601927568, + 14941729413387159766, + null, + null, + 955, + 971, + 954, + 970, + 170, + 172, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14652256560445338257, + 5402311031783443666, + null, + null, + 976, + 984, + 975, + 983, + 173, + 174, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15423286190643719046, + 11690064648926497115, + null, + null, + 1000, + 1066, + 999, + 1065, + 177, + 188, + true, + "In distributed computing, it corresponds to a reduction operation.", + "In distributed computing, it corresponds to a reduction operation." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 5947879507992892292, + 10649999492811896566, + null, + null, + 1015, + 1024, + 1014, + 1023, + 179, + 180, + true, + "computing", + "computing" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9283617342877675041, + 15873184834718781399, + null, + null, + 1046, + 1065, + 1045, + 1064, + 185, + 187, + true, + "reduction operation", + "reduction operation" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16888006153925329926, + 2859445480279325516, + null, + null, + 1067, + 1179, + 1066, + 1178, + 188, + 203, + true, + "Our implementation distributes the iteration of the source elements among all available computational resources.", + "Our implementation distributes the iteration of the source elements among all available computational resources." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16770038681622514616, + 4247418780695300556, + null, + null, + 1071, + 1085, + 1070, + 1084, + 189, + 190, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 6182474587515941250, + 2243782707457437586, + null, + null, + 1102, + 1111, + 1101, + 1110, + 192, + 193, + true, + "iteration", + "iteration" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 15765378636786371769, + 15305975287346165990, + null, + null, + 1119, + 1134, + 1118, + 1133, + 195, + 197, + true, + "source elements", + "source elements" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14336868642439968330, + 10406918958800501535, + null, + null, + 1145, + 1178, + 1144, + 1177, + 199, + 202, + true, + "available computational resources", + "available computational resources" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 10754148152517303717, + 16101411456265296552, + null, + null, + 1180, + 1323, + 1179, + 1322, + 203, + 229, + true, + "The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size.", + "The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 844664518895955636, + 7006404085999929209, + null, + null, + 1184, + 1195, + 1183, + 1194, + 204, + 205, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 4976485978415387103, + 17001722156673714678, + null, + null, + 1220, + 1232, + 1219, + 1231, + 210, + 212, + true, + "local buffer", + "local buffer" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8498518363315513669, + 5745264438629505035, + null, + null, + 1270, + 1286, + 1269, + 1285, + 219, + 221, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 11218037041441406912, + 11691128466607537398, + null, + null, + 1310, + 1322, + 1309, + 1321, + 226, + 228, + true, + "maximum size", + "maximum size" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 14105785377218888565, + 4351972894165463037, + null, + null, + 1324, + 1429, + 1323, + 1428, + 229, + 249, + true, + "The synchronization step is a simple atomic update into an existing (or a newly created) database object.", + "The synchronization step is a simple atomic update into an existing (or a newly created) database object." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9096796183603350034, + 9964838906647012705, + null, + null, + 1328, + 1348, + 1327, + 1347, + 230, + 232, + true, + "synchronization step", + "synchronization step" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 10270216220990789707, + 15932172102865871238, + null, + null, + 1354, + 1374, + 1353, + 1373, + 234, + 237, + true, + "simple atomic update", + "simple atomic update" + ], + [ + "parenthesis", + "round brackets", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13741485124789240495, + 7389280086660259889, + null, + null, + 1392, + 1412, + 1391, + 1411, + 240, + 246, + true, + "(or a newly created)", + "(or a newly created)" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 13739718539243203510, + 2110945273060730382, + null, + null, + 1413, + 1428, + 1412, + 1427, + 246, + 248, + true, + "database object", + "database object" + ], + [ + "sentence", + "proper", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 12813104783910172537, + 2053442892096489490, + null, + null, + 1430, + 1517, + 1429, + 1516, + 249, + 264, + true, + "The synchronization for updates from each worker task does not collide with the others.", + "The synchronization for updates from each worker task does not collide with the others." + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 17812108117212345078, + 17863514596000506382, + null, + null, + 1434, + 1449, + 1433, + 1448, + 250, + 251, + true, + "synchronization", + "synchronization" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 8106478011916155592, + 11450752277701638419, + null, + null, + 1454, + 1461, + 1453, + 1460, + 252, + 253, + true, + "updates", + "updates" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 9601975787072124180, + 17970814035593313155, + null, + null, + 1472, + 1483, + 1471, + 1482, + 255, + 257, + true, + "worker task", + "worker task" + ], + [ + "term", + "single-term", + 13974986056043304735, + "TEXT", + "#/texts/56", + 1.0, + 16381206566198176359, + 13844350319778229358, + null, + null, + 1510, + 1516, + 1509, + 1515, + 262, + 263, + true, + "others", + "others" + ], + [ + "sentence", + "improper", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12765605759878485615, + 12015650457911020971, + null, + null, + 0, + 36, + 0, + 36, + 0, + 9, + true, + "2.1.4 | Aggregation of relationships", + "2.1.4 | Aggregation of relationships" + ], + [ + "expression", + "wtoken-concatenation", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 329104147711421772, + 3440265738096889757, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "2.1.4", + "2.1.4" + ], + [ + "numval", + "fval", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12178341415896439119, + 9356899144609064731, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.1", + "2.1" + ], + [ + "numval", + "ival", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 17767354399704235156, + 5166044511235843509, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 12405860233317252202, + 3087312697670277359, + null, + null, + 6, + 19, + 6, + 19, + 5, + 7, + true, + "| Aggregation", + "| Aggregation" + ], + [ + "term", + "single-term", + 5985285694705576020, + "TEXT", + "#/texts/57", + 1.0, + 8279380567349713241, + 13167358372837789646, + null, + null, + 23, + 36, + 23, + 36, + 8, + 9, + true, + "relationships", + "relationships" + ], + [ + "sentence", + "proper", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 5318507464051547992, + 15229729095805195430, + null, + null, + 0, + 134, + 0, + 134, + 0, + 19, + true, + "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation.", + "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation." + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 844664518895955636, + 4707743503550014047, + null, + null, + 4, + 15, + 4, + 15, + 1, + 2, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8279380567349713241, + 4993685421577464738, + null, + null, + 19, + 32, + 19, + 32, + 3, + 4, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 6172031743812195918, + 8603792663584797402, + null, + null, + 44, + 53, + 44, + 53, + 5, + 7, + true, + "new links", + "new links" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14652256560445338257, + 12349417693150165423, + null, + null, + 66, + 74, + 66, + 74, + 9, + 10, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17436612889402329741, + 257686206738688629, + null, + null, + 103, + 133, + 103, + 133, + 15, + 18, + true, + "previous aggregation operation", + "previous aggregation operation" + ], + [ + "sentence", + "proper", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 7339777553356328117, + 17064922654164515372, + null, + null, + 135, + 270, + 135, + 270, + 19, + 43, + true, + "In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together.", + "In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together." + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 16381206514091025767, + 1041958781247576177, + null, + null, + 138, + 144, + 138, + 144, + 20, + 21, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17767354399704235161, + 6700456192654799825, + null, + null, + 145, + 146, + 145, + 146, + 21, + 22, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 389609625631210899, + 4226532899380792628, + null, + null, + 153, + 157, + 153, + 157, + 24, + 25, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 4041064346196287786, + 12193344528431002782, + null, + null, + 177, + 191, + 177, + 191, + 29, + 31, + true, + "last operation", + "last operation" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14652256560445338257, + 12349417693150135191, + null, + null, + 199, + 207, + 199, + 207, + 33, + 34, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17634022196856315426, + 12810331327669295159, + null, + null, + 216, + 238, + 216, + 238, + 36, + 38, + true, + "annotated relationship", + "annotated relationship" + ], + [ + "sentence", + "proper", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 16272411550008296662, + 11555295498174731192, + null, + null, + 271, + 384, + 271, + 384, + 43, + 67, + true, + "For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", + "For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1." + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8106397496085150773, + 13532347169213908612, + null, + null, + 275, + 282, + 275, + 282, + 44, + 45, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 389609625699630670, + 4202499610317382249, + null, + null, + 297, + 301, + 297, + 301, + 49, + 50, + true, + "edge", + "edge" + ], + [ + "name", + "name-concatenation", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 2223855877560943312, + 1919053918596705356, + null, + null, + 314, + 328, + 314, + 328, + 52, + 55, + true, + "Egret-Hibernia", + "Egret-Hibernia" + ], + [ + "term", + "enum-term-mark-4", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14509765494259181124, + 6229488919186620168, + null, + null, + 320, + 358, + 320, + 358, + 54, + 60, + true, + "Hibernia Petroleum System and Jeanne D", + "Hibernia Petroleum System and Jeanne D" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 8984547291290070810, + 10785884644016224053, + null, + null, + 320, + 345, + 320, + 345, + 54, + 57, + true, + "Hibernia Petroleum System", + "Hibernia Petroleum System" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 14652280738006232506, + 7568070556752739322, + null, + null, + 350, + 358, + 350, + 358, + 58, + 60, + true, + "Jeanne D", + "Jeanne D" + ], + [ + "expression", + "wtoken-concatenation", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 329104147807359846, + 8954447317539411217, + null, + null, + 357, + 362, + 357, + 362, + 59, + 62, + true, + "D'Arc", + "D'Arc" + ], + [ + "term", + "single-term", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 6561252587229194230, + 15912761612523134469, + null, + null, + 359, + 368, + 359, + 368, + 61, + 63, + true, + "Arc Basin", + "Arc Basin" + ], + [ + "numval", + "ival", + 11235296141350659290, + "TEXT", + "#/texts/58", + 1.0, + 17767354399704235161, + 6700456192654780632, + null, + null, + 382, + 383, + 382, + 383, + 65, + 66, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/59", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 17767354399704235158, + 5655206626033153623, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "6", + "6" + ], + [ + "sentence", + "improper", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 15441160910541485670, + 15406507517050657965, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549266576336732, + "TEXT", + "#/texts/60", + 1.0, + 15441160910541481979, + 15406507443958837158, + null, + null, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "proper", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 1989015336043033185, + 17681587218053733993, + null, + null, + 0, + 104, + 0, + 104, + 0, + 17, + true, + "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation.", + "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation." + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 844664518895955636, + 6942483173811422672, + null, + null, + 15, + 26, + 15, + 26, + 3, + 4, + true, + "aggregation", + "aggregation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14652256560445338257, + 16076905631701172695, + null, + null, + 30, + 38, + 30, + 38, + 5, + 6, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 9614479359601927568, + 11015701264450843984, + null, + null, + 44, + 60, + 44, + 60, + 8, + 10, + true, + "aggregation task", + "aggregation task" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8279380567349713241, + 4443872868806033431, + null, + null, + 65, + 78, + 65, + 78, + 11, + 12, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 9283617342877675041, + 1437171259999206723, + null, + null, + 84, + 103, + 84, + 103, + 14, + 16, + true, + "reduction operation", + "reduction operation" + ], + [ + "sentence", + "proper", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 7530777968862609021, + 1101347999983368416, + null, + null, + 105, + 199, + 105, + 199, + 17, + 30, + true, + "Two independent document components could describe the same relationship between two entities.", + "Two independent document components could describe the same relationship between two entities." + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 1050457497437719405, + 14594207665656202151, + null, + null, + 105, + 140, + 105, + 140, + 17, + 21, + true, + "Two independent document components", + "Two independent document components" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11267724614336150621, + 8843317855331358448, + null, + null, + 160, + 177, + 160, + 177, + 24, + 26, + true, + "same relationship", + "same relationship" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14652256560445338257, + 16076905631701100415, + null, + null, + 190, + 198, + 190, + 198, + 28, + 29, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 18319377738763554945, + 17590146835780286845, + null, + null, + 200, + 405, + 200, + 405, + 30, + 65, + true, + "To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached.", + "To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached." + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11690899603853993269, + 1940567799173253407, + null, + null, + 216, + 248, + 216, + 248, + 33, + 36, + true, + "synchronization lookup operation", + "synchronization lookup operation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8498518363315513669, + 11874681077770548005, + null, + null, + 258, + 274, + 258, + 274, + 38, + 40, + true, + "backend database", + "backend database" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 389609625631210899, + 6515706229842801309, + null, + null, + 281, + 285, + 281, + 285, + 42, + 43, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 4976485978415387103, + 13157458319015694846, + null, + null, + 302, + 314, + 302, + 314, + 46, + 48, + true, + "local buffer", + "local buffer" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8106396967856974361, + 16862662699097952271, + null, + null, + 337, + 344, + 337, + 344, + 51, + 52, + true, + "changes", + "changes" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 15441160910541480204, + 7076021896677418682, + null, + null, + 368, + 370, + 368, + 370, + 57, + 58, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 11218037041441406912, + 710943453708216350, + null, + null, + 381, + 393, + 381, + 393, + 60, + 62, + true, + "maximum size", + "maximum size" + ], + [ + "sentence", + "proper", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13293192132043146016, + 5012103471707666411, + null, + null, + 406, + 564, + 406, + 564, + 65, + 89, + true, + "This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", + "This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database." + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14650448032998792781, + 18358605601302965052, + null, + null, + 411, + 419, + 411, + 419, + 66, + 67, + true, + "approach", + "approach" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 14747625504171261759, + 12271672629498529699, + null, + null, + 445, + 456, + 445, + 456, + 71, + 72, + true, + "computation", + "computation" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13327256571445639908, + 12955231630551139133, + null, + null, + 471, + 497, + 471, + 497, + 75, + 78, + true, + "source document components", + "source document components" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 13985988710970420061, + 7242007196001838848, + null, + null, + 529, + 539, + 529, + 539, + 83, + 84, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 5771309285006424458, + "TEXT", + "#/texts/61", + 1.0, + 8498518363315513669, + 11874681077770959213, + null, + null, + 547, + 563, + 547, + 563, + 86, + 88, + true, + "backend database", + "backend database" + ], + [ + "numval", + "fval", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 12178341415896439118, + 9239884836110286517, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "2.2", + "2.2" + ], + [ + "sentence", + "improper", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 6804373445441261094, + 2924256996521789407, + null, + null, + 4, + 16, + 4, + 16, + 3, + 6, + true, + "| Data flows", + "| Data flows" + ], + [ + "term", + "single-term", + 5371685212527510397, + "TEXT", + "#/texts/62", + 1.0, + 389609625537659398, + 16585943581147490691, + null, + null, + 6, + 10, + 6, + 10, + 4, + 5, + true, + "Data", + "Data" + ], + [ + "sentence", + "proper", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 5027814746067812274, + 8880743583508752189, + null, + null, + 0, + 144, + 0, + 144, + 0, + 29, + true, + "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG.", + "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG." + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 8106479265948440982, + 781741948405146011, + null, + null, + 4, + 11, + 4, + 11, + 1, + 2, + true, + "purpose", + "purpose" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15441160910541480770, + 14767814329685856037, + null, + null, + 17, + 19, + 17, + 19, + 4, + 5, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 513252523484387603, + 17258289923281663294, + null, + null, + 37, + 51, + 37, + 51, + 9, + 11, + true, + "execution plan", + "execution plan" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 4681591099656035072, + 3420565556676131642, + null, + null, + 60, + 70, + 60, + 70, + 13, + 15, + true, + "task types", + "task types" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15530988091855779508, + 13374334168429685199, + null, + null, + 91, + 107, + 91, + 107, + 19, + 21, + true, + "meaningful order", + "meaningful order" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 541002758701937407, + 10425137461922241957, + null, + null, + 132, + 143, + 132, + 143, + 26, + 28, + true, + "specific KG", + "specific KG" + ], + [ + "sentence", + "improper", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 5885770492001511082, + 11316457715017340686, + null, + null, + 145, + 225, + 145, + 225, + 29, + 45, + true, + "When instantiating a DF, one has the possibility to define in a declarative way:", + "When instantiating a DF, one has the possibility to define in a declarative way:" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 15441160910541480770, + 14767814329685844554, + null, + null, + 166, + 168, + 166, + 168, + 32, + 33, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 17078598475728807446, + 1685386194017502435, + null, + null, + 182, + 193, + 182, + 193, + 37, + 38, + true, + "possibility", + "possibility" + ], + [ + "term", + "single-term", + 7817257645383866853, + "TEXT", + "#/texts/63", + 1.0, + 2892304827914802359, + 2785411801236762324, + null, + null, + 209, + 224, + 209, + 224, + 42, + 44, + true, + "declarative way", + "declarative way" + ], + [ + "numval", + "ival", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17767354399704235161, + 9308892477550455324, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "sentence", + "improper", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17767354399704235166, + 9308892478371477431, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17952852338970756919, + 13111419393486766397, + null, + null, + 3, + 179, + 3, + 179, + 2, + 34, + true, + "Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", + "Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 17524405716142769441, + 17590820034902442787, + null, + null, + 9, + 28, + 9, + 28, + 3, + 5, + true, + "document components", + "document components" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 12990634353973901002, + 15215534364197625311, + null, + null, + 56, + 72, + 56, + 72, + 10, + 12, + true, + "converted corpus", + "converted corpus" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15765380208127739160, + 3487088127591388980, + null, + null, + 81, + 96, + 81, + 96, + 14, + 16, + true, + "source entities", + "source entities" + ], + [ + "parenthesis", + "round brackets", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 6687613673164405501, + 3718808189748185983, + null, + null, + 97, + 178, + 97, + 178, + 16, + 33, + true, + "(eg, extract all paragraphs, tables, figures and captions from the AAPG articles)", + "(eg, extract all paragraphs, tables, figures and captions from the AAPG articles)" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15441160910541487324, + 14099211802745898044, + null, + null, + 98, + 100, + 98, + 100, + 17, + 18, + true, + "eg", + "eg" + ], + [ + "term", + "enum-term-mark-3", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 13927242563355017790, + 11111361558765550691, + null, + null, + 114, + 154, + 114, + 154, + 21, + 28, + true, + "paragraphs, tables, figures and captions", + "paragraphs, tables, figures and captions" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 13968965538538956038, + 15874836512031319554, + null, + null, + 114, + 124, + 114, + 124, + 21, + 22, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 16381206513098478539, + 16007955502257386472, + null, + null, + 126, + 132, + 126, + 132, + 23, + 24, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 8106397480533647371, + 5407999228276195896, + null, + null, + 134, + 141, + 134, + 141, + 25, + 26, + true, + "figures", + "figures" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 14652289689770638970, + 3082690678157308456, + null, + null, + 146, + 154, + 146, + 154, + 27, + 28, + true, + "captions", + "captions" + ], + [ + "term", + "single-term", + 2929626768872004841, + "TEXT", + "#/texts/64", + 1.0, + 15509825031107342057, + 16430485838307002175, + null, + null, + 164, + 177, + 164, + 177, + 30, + 32, + true, + "AAPG articles", + "AAPG articles" + ], + [ + "numval", + "ival", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17767354399704235162, + 8832343908208005813, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "sentence", + "improper", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17767354399704235166, + 8832343915883415512, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 5555439802994169065, + 12582050013497350866, + null, + null, + 3, + 133, + 3, + 133, + 2, + 30, + true, + "Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", + "Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 4278121145758736886, + 7548965380005005828, + null, + null, + 9, + 24, + 9, + 24, + 3, + 5, + true, + "annotator model", + "annotator model" + ], + [ + "expression", + "wtoken-concatenation", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 14638289750758744304, + 5209943650119548686, + null, + null, + 19, + 27, + 19, + 27, + 4, + 8, + true, + "model(s)", + "model(s)" + ], + [ + "parenthesis", + "round brackets", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 12178341415896391104, + 1916219311504389810, + null, + null, + 24, + 27, + 24, + 27, + 5, + 8, + true, + "(s)", + "(s)" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 389609625631434316, + 1689548585610910345, + null, + null, + 44, + 48, + 44, + 48, + 12, + 13, + true, + "type", + "type" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 17621545813270270871, + 9209277177813395468, + null, + null, + 52, + 65, + 52, + 65, + 14, + 16, + true, + "source entity", + "source entity" + ], + [ + "parenthesis", + "round brackets", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 14471732333604091421, + 18137583642465182298, + null, + null, + 66, + 132, + 66, + 132, + 16, + 29, + true, + "(eg, run the geology or material science annotators on paragraphs)", + "(eg, run the geology or material science annotators on paragraphs)" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 15441160910541487324, + 11728379687465099158, + null, + null, + 67, + 69, + 67, + 69, + 17, + 18, + true, + "eg", + "eg" + ], + [ + "term", + "enum-term-mark-2", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8092669759698512989, + 2405114237402716424, + null, + null, + 79, + 106, + 79, + 106, + 21, + 25, + true, + "geology or material science", + "geology or material science" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8106396492330410986, + 14716777031470552583, + null, + null, + 79, + 86, + 79, + 86, + 21, + 22, + true, + "geology", + "geology" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 8077232144599436920, + 3097313284964407388, + null, + null, + 90, + 117, + 90, + 117, + 23, + 26, + true, + "material science annotators", + "material science annotators" + ], + [ + "term", + "single-term", + 15879756297712818143, + "TEXT", + "#/texts/65", + 1.0, + 13968965538538956038, + 9693440826264227420, + null, + null, + 121, + 131, + 121, + 131, + 27, + 28, + true, + "paragraphs", + "paragraphs" + ], + [ + "numval", + "ival", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 17767354399704235163, + 4307298561096377444, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "improper", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 17767354399704235166, + 4307298561299144891, + null, + null, + 1, + 2, + 1, + 2, + 1, + 2, + true, + ".", + "." + ], + [ + "sentence", + "proper", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 5536330998509339910, + 8869846018318508353, + null, + null, + 3, + 101, + 3, + 101, + 2, + 17, + true, + "Which entity and relationship aggregations to perform on which set of annotated language entities?", + "Which entity and relationship aggregations to perform on which set of annotated language entities?" + ], + [ + "term", + "enum-term-mark-2", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 15819622148818218229, + 3247544755530782749, + null, + null, + 9, + 32, + 9, + 32, + 3, + 6, + true, + "entity and relationship", + "entity and relationship" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 16381206564577775616, + 17802925239924973922, + null, + null, + 9, + 15, + 9, + 15, + 3, + 4, + true, + "entity", + "entity" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 10375574698259277266, + 6831591855006875406, + null, + null, + 20, + 45, + 20, + 45, + 5, + 7, + true, + "relationship aggregations", + "relationship aggregations" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 12178341415895638602, + 11898208455583787541, + null, + null, + 66, + 69, + 66, + 69, + 11, + 12, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 16116531546352845311, + "TEXT", + "#/texts/66", + 1.0, + 13768846528430928163, + 13590159867312231253, + null, + null, + 73, + 100, + 73, + 100, + 13, + 16, + true, + "annotated language entities", + "annotated language entities" + ], + [ + "sentence", + "proper", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 17890377425977316094, + 4134511052230682896, + null, + null, + 0, + 95, + 0, + 95, + 0, + 18, + true, + "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology.", + "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology." + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209333011444, + null, + null, + 4, + 7, + 4, + 7, + 1, + 2, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 7073262388879647009, + 1185102068772771476, + null, + null, + 28, + 38, + 28, + 38, + 7, + 8, + true, + "blueprints", + "blueprints" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206562408205435, + 5202994988327969889, + null, + null, + 58, + 64, + 58, + 64, + 11, + 12, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 15908928394544452409, + 3315967902757812332, + null, + null, + 80, + 94, + 80, + 94, + 15, + 17, + true, + "graph topology", + "graph topology" + ], + [ + "sentence", + "proper", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 3849602608152927799, + 5933795491641734356, + null, + null, + 96, + 385, + 96, + 385, + 18, + 68, + true, + "Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases.", + "Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases." + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16770038681622514616, + 8650515165933909157, + null, + null, + 109, + 123, + 109, + 123, + 21, + 22, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209333018399, + null, + null, + 127, + 130, + 127, + 130, + 23, + 24, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 329104159214088329, + 3704404855780482013, + null, + null, + 141, + 146, + 141, + 146, + 26, + 27, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 1588332591737418271, + 11333604657040713834, + null, + null, + 159, + 170, + 159, + 170, + 29, + 30, + true, + "flexibility", + "flexibility" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 11461340646147400027, + 5645701091668897811, + null, + null, + 194, + 210, + 194, + 210, + 34, + 36, + true, + "source documents", + "source documents" + ], + [ + "expression", + "word-concatenation", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 8411957399126827159, + 17553608664055113416, + null, + null, + 216, + 226, + 216, + 226, + 38, + 41, + true, + "well-known", + "well-known" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 389609625633611873, + 84147999315830457, + null, + null, + 216, + 220, + 216, + 220, + 38, + 39, + true, + "well", + "well" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 4106840258168008230, + 15428440868245102737, + null, + null, + 227, + 238, + 227, + 238, + 41, + 43, + true, + "data schema", + "data schema" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896221596, + 16353806306611832484, + null, + null, + 252, + 255, + 252, + 255, + 46, + 47, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 6168083952332832164, + 11352255425650886421, + null, + null, + 275, + 284, + 275, + 284, + 51, + 52, + true, + "structure", + "structure" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 2385031725262916889, + 12425871286607371512, + null, + null, + 314, + 333, + 314, + 333, + 58, + 60, + true, + "JSON representation", + "JSON representation" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 5594093096302267983, + 5645763396809619566, + null, + null, + 345, + 358, + 345, + 358, + 62, + 64, + true, + "data entities", + "data entities" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 5384513680513712549, + 1613404999844258732, + null, + null, + 364, + 384, + 364, + 384, + 65, + 67, + true, + "precurated databases", + "precurated databases" + ], + [ + "sentence", + "proper", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 6233663802035603519, + 12646838603453794377, + null, + null, + 386, + 525, + 386, + 525, + 68, + 93, + true, + "We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", + "We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch." + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12779036928191531604, + 17810175013447901093, + null, + null, + 402, + 414, + 402, + 414, + 71, + 73, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "enum-term-mark-2", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 11121084906404368828, + 14065085376394746955, + null, + null, + 426, + 443, + 426, + 443, + 75, + 78, + true, + "export and import", + "export and import" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206569048371007, + 12545384013888449433, + null, + null, + 426, + 432, + 426, + 432, + 75, + 76, + true, + "export", + "export" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206560633506211, + 6020333096561425742, + null, + null, + 437, + 443, + 437, + 443, + 77, + 78, + true, + "import", + "import" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 12178341415896110548, + 16353810209332997583, + null, + null, + 447, + 450, + 447, + 450, + 79, + 80, + true, + "DFs", + "DFs" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 15983059512171872769, + 12420728277455273707, + null, + null, + 463, + 475, + 463, + 475, + 82, + 84, + true, + "new datasets", + "new datasets" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 16381206569552972313, + 5307296425469714515, + null, + null, + 488, + 494, + 488, + 494, + 86, + 87, + true, + "burden", + "burden" + ], + [ + "term", + "single-term", + 9541434157786316356, + "TEXT", + "#/texts/67", + 1.0, + 8106475349459363877, + 15665911979141161163, + null, + null, + 517, + 524, + 517, + 524, + 91, + 92, + true, + "scratch", + "scratch" + ], + [ + "sentence", + "proper", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14390776963480530757, + 14144391792905836053, + null, + null, + 0, + 230, + 0, + 230, + 0, + 39, + true, + "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel.", + "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel." + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15829965978251528098, + 1372718158532474013, + null, + null, + 4, + 18, + 4, + 18, + 1, + 3, + true, + "backend engine", + "backend engine" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12178341415896112046, + 7013214852574030934, + null, + null, + 35, + 38, + 35, + 38, + 6, + 7, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15441160910541480770, + 448741175652348348, + null, + null, + 59, + 61, + 59, + 61, + 10, + 11, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4465888657594319459, + 9689611985399197926, + null, + null, + 90, + 106, + 90, + 106, + 15, + 17, + true, + "individual tasks", + "individual tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4421383392096991748, + 16000306739803699679, + null, + null, + 114, + 131, + 114, + 131, + 19, + 21, + true, + "compute resources", + "compute resources" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13920350439839821083, + 16955722751473761512, + null, + null, + 141, + 161, + 141, + 161, + 23, + 25, + true, + "independent branches", + "independent branches" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12178341415896112046, + 7013214852574069656, + null, + null, + 169, + 172, + 169, + 172, + 27, + 28, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 329104161556625920, + 10005480021798899834, + null, + null, + 191, + 196, + 191, + 196, + 31, + 32, + true, + "chain", + "chain" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 329104159214088329, + 9882082029920318768, + null, + null, + 200, + 205, + 200, + 205, + 33, + 34, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14814034872218884114, + 4851277065202120364, + null, + null, + 221, + 229, + 221, + 229, + 37, + 38, + true, + "parallel", + "parallel" + ], + [ + "sentence", + "proper", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 10530703581906041107, + 10566686277400129697, + null, + null, + 231, + 300, + 231, + 300, + 39, + 49, + true, + "The achievable level of parallelism changes throughout the execution.", + "The achievable level of parallelism changes throughout the execution." + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 10252716422187396036, + 11724540237306114869, + null, + null, + 235, + 251, + 235, + 251, + 40, + 42, + true, + "achievable level", + "achievable level" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4840493146633456446, + 4990244669328773678, + null, + null, + 255, + 274, + 255, + 274, + 43, + 45, + true, + "parallelism changes", + "parallelism changes" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6168355606348623882, + 6552081320825944349, + null, + null, + 290, + 299, + 290, + 299, + 47, + 48, + true, + "execution", + "execution" + ], + [ + "sentence", + "proper", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15328322475466549779, + 6356331500693372898, + null, + null, + 301, + 459, + 301, + 459, + 49, + 76, + true, + "A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities.", + "A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities." + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16494665832924434534, + 11964331841030860490, + null, + null, + 303, + 320, + 303, + 320, + 50, + 52, + true, + "practical example", + "practical example" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15441160910541480770, + 448741175652334105, + null, + null, + 326, + 328, + 326, + 328, + 54, + 55, + true, + "DF", + "DF" + ], + [ + "term", + "enum-term-mark-3", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 857608957643671729, + 8789227088448338439, + null, + null, + 344, + 368, + 344, + 368, + 57, + 60, + true, + "paragraphs and abstracts", + "paragraphs and abstracts" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13968965538538956038, + 5696982554616545212, + null, + null, + 344, + 354, + 344, + 354, + 57, + 58, + true, + "paragraphs", + "paragraphs" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 5950055304304346669, + 13420779396952801457, + null, + null, + 359, + 368, + 359, + 368, + 59, + 60, + true, + "abstracts", + "abstracts" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6167933651658664291, + 12910647832495839984, + null, + null, + 378, + 387, + 378, + 387, + 62, + 63, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16381206562408205435, + 10625918246812111142, + null, + null, + 395, + 401, + 395, + 401, + 65, + 66, + true, + "corpus", + "corpus" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 15389240612279378533, + 10613103374378756986, + null, + null, + 435, + 445, + 435, + 445, + 72, + 73, + true, + "aggregates", + "aggregates" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 14652256560445338257, + 12562074549158643256, + null, + null, + 450, + 458, + 450, + 458, + 74, + 75, + true, + "entities", + "entities" + ], + [ + "sentence", + "proper", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 8672431139074644597, + 15077769571580712776, + null, + null, + 460, + 614, + 460, + 614, + 76, + 104, + true, + "Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components.", + "Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components." + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 16789581223630763462, + 11144393400275200701, + null, + null, + 470, + 486, + 470, + 486, + 79, + 81, + true, + "extraction tasks", + "extraction tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 6167933651658664291, + 12910647832496717158, + null, + null, + 517, + 526, + 517, + 526, + 86, + 87, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 2926971850916888288, + 11862075283092330987, + null, + null, + 541, + 557, + 541, + 557, + 92, + 94, + true, + "annotation tasks", + "annotation tasks" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 18223316012831076048, + 9614179813513706564, + null, + null, + 575, + 586, + 575, + 586, + 98, + 99, + true, + "parallelism", + "parallelism" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 17524405716142769441, + 2707237517370053311, + null, + null, + 594, + 613, + 594, + 613, + 101, + 103, + true, + "document components", + "document components" + ], + [ + "sentence", + "proper", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 13667161688560255794, + 11144968750389162501, + null, + null, + 615, + 693, + 615, + 693, + 104, + 117, + true, + "Any synchronization points thus can be pushed back into the aggregation tasks.", + "Any synchronization points thus can be pushed back into the aggregation tasks." + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 4432720038651401912, + 7824075473089442522, + null, + null, + 619, + 641, + 619, + 641, + 105, + 107, + true, + "synchronization points", + "synchronization points" + ], + [ + "term", + "single-term", + 997682002692959482, + "TEXT", + "#/texts/68", + 1.0, + 12420511630999364190, + 12482143430448245195, + null, + null, + 675, + 692, + 675, + 692, + 114, + 116, + true, + "aggregation tasks", + "aggregation tasks" + ], + [ + "numval", + "ival", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 17767354399704235163, + 13032776934094914368, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "improper", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 4476259065516669581, + 8481893244467504716, + null, + null, + 2, + 48, + 2, + 48, + 1, + 8, + true, + "| DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", + "| DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS" + ], + [ + "term", + "single-term", + 11590138063543342276, + "TEXT", + "#/texts/69", + 1.0, + 14675774684852204629, + 10752707062932991144, + null, + null, + 4, + 48, + 4, + 48, + 2, + 8, + true, + "DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", + "DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS" + ], + [ + "sentence", + "proper", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 15139433187664310492, + 4695469573464464784, + null, + null, + 0, + 103, + 0, + 103, + 0, + 18, + true, + "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph.", + "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph." + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13240311013633905449, + 17335418227251459731, + null, + null, + 26, + 38, + 26, + 38, + 6, + 7, + true, + "requirements", + "requirements" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13671659409933113155, + 4173424983166285630, + null, + null, + 50, + 71, + 50, + 71, + 9, + 12, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 4605433253513798881, + 12143484967953891920, + null, + null, + 77, + 102, + 77, + 102, + 14, + 17, + true, + "populated Knowledge Graph", + "populated Knowledge Graph" + ], + [ + "sentence", + "improper", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 2801533934034477883, + 18076787076612138377, + null, + null, + 104, + 166, + 104, + 166, + 18, + 27, + true, + "A deep data exploration requires two fundamental capabilities:", + "A deep data exploration requires two fundamental capabilities:" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 13671659409933113155, + 4173424983166279992, + null, + null, + 106, + 127, + 106, + 127, + 19, + 22, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 16380310806374538602, + "TEXT", + "#/texts/70", + 1.0, + 16355783708075937518, + 758382672449514167, + null, + null, + 141, + 165, + 141, + 165, + 24, + 26, + true, + "fundamental capabilities", + "fundamental capabilities" + ], + [ + "numval", + "ival", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 17767354399704235161, + 14832870493709788748, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "sentence", + "improper", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 4806888013365821011, + 14967712159992384847, + null, + null, + 1, + 92, + 1, + 92, + 1, + 20, + true, + ". perform deep queries on the graph, that is, queries that require multi-hop traversals and", + ". perform deep queries on the graph, that is, queries that require multi-hop traversals and" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 7076268937724050913, + 757920359996890916, + null, + null, + 11, + 23, + 11, + 23, + 3, + 5, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 329104159211247965, + 15479909791715347705, + null, + null, + 31, + 36, + 31, + 36, + 7, + 8, + true, + "graph", + "graph" + ], + [ + "expression", + "word-concatenation", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 6180052837118668048, + 5133447445234002483, + null, + null, + 68, + 77, + 68, + 77, + 15, + 18, + true, + "multi-hop", + "multi-hop" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 329104161505536647, + 13265400285065104050, + null, + null, + 68, + 73, + 68, + 73, + 15, + 16, + true, + "multi", + "multi" + ], + [ + "term", + "single-term", + 5393976293631695754, + "TEXT", + "#/texts/71", + 1.0, + 10552270358215062354, + 15484484601665128769, + null, + null, + 74, + 88, + 74, + 88, + 17, + 19, + true, + "hop traversals", + "hop traversals" + ], + [ + "numval", + "ival", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 17767354399704235162, + 6940844591694806953, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "sentence", + "improper", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 11193184128140813540, + 1283446586766936557, + null, + null, + 1, + 61, + 1, + 61, + 1, + 13, + true, + ". perform graph analytics on the full graph or subsets of it", + ". perform graph analytics on the full graph or subsets of it" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 4237976234056442894, + 17998220857740869205, + null, + null, + 11, + 26, + 11, + 26, + 3, + 5, + true, + "graph analytics", + "graph analytics" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 16104705485729814979, + 4683468690442044165, + null, + null, + 34, + 44, + 34, + 44, + 7, + 9, + true, + "full graph", + "full graph" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 8106478685921145000, + 17803707003211492579, + null, + null, + 48, + 55, + 48, + 55, + 10, + 11, + true, + "subsets", + "subsets" + ], + [ + "sentence", + "proper", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 9325671930388359069, + 16472228699755687388, + null, + null, + 62, + 73, + 62, + 73, + 13, + 19, + true, + "on-the-fly.", + "on-the-fly." + ], + [ + "expression", + "word-concatenation", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 14042859618039714361, + 17116381936376022837, + null, + null, + 62, + 72, + 62, + 72, + 13, + 18, + true, + "on-the-fly", + "on-the-fly" + ], + [ + "term", + "single-term", + 1988335831916069382, + "TEXT", + "#/texts/72", + 1.0, + 12178341415895627678, + 3945737302772263866, + null, + null, + 69, + 72, + 69, + 72, + 17, + 18, + true, + "fly", + "fly" + ], + [ + "sentence", + "proper", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11326071441728208854, + 9963382484873233605, + null, + null, + 0, + 104, + 0, + 104, + 0, + 16, + true, + "Deep queries are essential to dynamically combine independent facts together in the given query context.", + "Deep queries are essential to dynamically combine independent facts together in the given query context." + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11805369560476404678, + 15092745044709578481, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "Deep queries", + "Deep queries" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10975373624156076918, + 11938916105068148807, + null, + null, + 50, + 67, + 50, + 67, + 7, + 9, + true, + "independent facts", + "independent facts" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 16172198469897816706, + 17761989334252452030, + null, + null, + 90, + 103, + 90, + 103, + 13, + 15, + true, + "query context", + "query context" + ], + [ + "sentence", + "proper", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10328879830457680735, + 16040219150643269355, + null, + null, + 105, + 254, + 105, + 254, + 16, + 43, + true, + "This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5).", + "This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5)." + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8106397496085150773, + 4162592152493996363, + null, + null, + 126, + 133, + 126, + 133, + 20, + 21, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 13481069231801630849, + 12775598423777163361, + null, + null, + 137, + 158, + 137, + 158, + 22, + 24, + true, + "explorational queries", + "explorational queries" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 2735423192832266389, + 2837241149989494242, + null, + null, + 181, + 206, + 181, + 206, + 27, + 30, + true, + "petroleum system elements", + "petroleum system elements" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 2873469788203493819, + 10206430255073580142, + null, + null, + 227, + 237, + 227, + 237, + 35, + 37, + true, + "case study", + "case study" + ], + [ + "parenthesis", + "round brackets", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 13881206615676963801, + 14805625262429425181, + null, + null, + 238, + 253, + 238, + 253, + 37, + 42, + true, + "(see section 5)", + "(see section 5)" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8106478708629288965, + 4163190785571344199, + null, + null, + 243, + 250, + 243, + 250, + 39, + 40, + true, + "section", + "section" + ], + [ + "numval", + "ival", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 17767354399704235157, + 8804028754113186404, + null, + null, + 251, + 252, + 251, + 252, + 40, + 41, + true, + "5", + "5" + ], + [ + "sentence", + "proper", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 10167104780571413476, + 11008366146913000479, + null, + null, + 255, + 326, + 255, + 326, + 43, + 55, + true, + "Graph analytics can further reveal hidden structure in the KG topology.", + "Graph analytics can further reveal hidden structure in the KG topology." + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 968326748584202361, + 1506605499402907857, + null, + null, + 255, + 270, + 255, + 270, + 43, + 45, + true, + "Graph analytics", + "Graph analytics" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 8767693934171074605, + 9137059685047075674, + null, + null, + 290, + 306, + 290, + 306, + 48, + 50, + true, + "hidden structure", + "hidden structure" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 6992599754813268015, + 399356179529846231, + null, + null, + 314, + 325, + 314, + 325, + 52, + 54, + true, + "KG topology", + "KG topology" + ], + [ + "sentence", + "proper", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 11539294678044249131, + 18223140021759824115, + null, + null, + 327, + 472, + 327, + 472, + 55, + 80, + true, + "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", + "Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation." + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 14650277098690689540, + 7868203806272457, + null, + null, + 327, + 335, + 327, + 335, + 55, + 56, + true, + "Examples", + "Examples" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 3093669945723512593, + 14416489053928682644, + null, + null, + 339, + 374, + 339, + 374, + 57, + 60, + true, + "advanced graphanalytical operations", + "advanced graphanalytical operations" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 6184977920700221726, + 16322060306778153855, + null, + null, + 379, + 388, + 379, + 388, + 61, + 63, + true, + "page rank", + "page rank" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 4977542118862070209, + 8478594702751520278, + null, + null, + 390, + 407, + 390, + 407, + 64, + 66, + true, + "node centralities", + "node centralities" + ], + [ + "numval", + "fval", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 389609625533812191, + 1960551977415557980, + null, + null, + 409, + 413, + 409, + 413, + 67, + 70, + true, + "9,10", + "9,10" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 9441128013663076980, + 4724848430636712553, + null, + null, + 414, + 429, + 414, + 429, + 70, + 72, + true, + "node clustering", + "node clustering" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 9079575722732701095, + 9418445845300672534, + null, + null, + 431, + 448, + 431, + 448, + 73, + 75, + true, + "spectral analysis", + "spectral analysis" + ], + [ + "term", + "enum-term-mark-2", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 14039610523492290379, + 8337307582732886868, + null, + null, + 440, + 471, + 440, + 471, + 74, + 79, + true, + "analysis, and label propagation", + "analysis, and label propagation" + ], + [ + "term", + "single-term", + 5147764798816678886, + "TEXT", + "#/texts/73", + 1.0, + 4996066085078527360, + 7290288987028820693, + null, + null, + 454, + 471, + 454, + 471, + 77, + 79, + true, + "label propagation", + "label propagation" + ], + [ + "sentence", + "proper", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3847089632682750791, + 9531589699293554617, + null, + null, + 0, + 231, + 0, + 231, + 0, + 42, + true, + "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth.", + "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7076268937724050913, + 5590677365945223320, + null, + null, + 5, + 17, + 5, + 17, + 1, + 3, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 4237976234056442894, + 18290860271812073513, + null, + null, + 22, + 37, + 22, + 37, + 4, + 6, + true, + "graph analytics", + "graph analytics" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 9197647117066059649, + 7265372759594219155, + null, + null, + 102, + 130, + 102, + 130, + 17, + 20, + true, + "conventional graph databases", + "conventional graph databases" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12732351021804658370, + 10575701010966589198, + null, + null, + 141, + 156, + 141, + 156, + 24, + 26, + true, + "rapid expansion", + "rapid expansion" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16381206574973295053, + 13593287647362680849, + null, + null, + 164, + 170, + 164, + 170, + 28, + 29, + true, + "number", + "number" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 10847427237839014643, + 6591514626586371035, + null, + null, + 174, + 187, + 174, + 187, + 30, + 32, + true, + "visited nodes", + "visited nodes" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 14637915316557309079, + 4828060395391576425, + null, + null, + 193, + 201, + 193, + 201, + 34, + 35, + true, + "function", + "function" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 8759553427650775934, + 8064528190798949556, + null, + null, + 209, + 224, + 209, + 224, + 37, + 40, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 329104159211247965, + 7003711144621466114, + null, + null, + 209, + 214, + 209, + 214, + 37, + 38, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 2922544134183826575, + 10546406182885758211, + null, + null, + 215, + 230, + 215, + 230, + 39, + 41, + true, + "traversal depth", + "traversal depth" + ], + [ + "sentence", + "proper", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12949087731978363617, + 17941143682626011533, + null, + null, + 232, + 326, + 232, + 326, + 42, + 60, + true, + "This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases.", + "This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16419030849690516097, + 3943363386701357449, + null, + null, + 242, + 256, + 242, + 256, + 45, + 47, + true, + "major obstacle", + "major obstacle" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7407757769083952026, + 8374588439870652293, + null, + null, + 270, + 285, + 270, + 285, + 49, + 51, + true, + "reasonable time", + "reasonable time" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6285955549867796622, + 3357902448538227798, + null, + null, + 281, + 297, + 281, + 297, + 50, + 55, + true, + "time-to-solution", + "time-to-solution" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 14635106751859230946, + 1754333906893193790, + null, + null, + 289, + 297, + 289, + 297, + 54, + 55, + true, + "solution", + "solution" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 7636868254411294311, + 5058362325883158613, + null, + null, + 305, + 325, + 305, + 325, + 57, + 59, + true, + "aforementioned cases", + "aforementioned cases" + ], + [ + "sentence", + "proper", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5048161526303226736, + 8851548305893694604, + null, + null, + 327, + 462, + 327, + 462, + 60, + 84, + true, + "Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources.", + "Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5347546348172783098, + 11784313264771460442, + null, + null, + 353, + 376, + 353, + 376, + 63, + 66, + true, + "graph database products", + "graph database products" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 9507265079216451910, + 8216015788353070056, + null, + null, + 384, + 396, + 384, + 396, + 68, + 70, + true, + "market today", + "market today" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 16381206566370240312, + 12606485506230202974, + null, + null, + 405, + 411, + 405, + 411, + 72, + 73, + true, + "victim", + "victim" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 2183265725869201141, + 4685897896439150964, + null, + null, + 445, + 461, + 445, + 461, + 81, + 83, + true, + "multiple sources", + "multiple sources" + ], + [ + "numval", + "fval", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 329104147749426795, + 8846331132305971160, + null, + null, + 463, + 468, + 463, + 468, + 84, + 87, + true, + "11,12", + "11,12" + ], + [ + "sentence", + "proper", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6385932786136317833, + 1838392916180151399, + null, + null, + 469, + 594, + 469, + 594, + 87, + 110, + true, + "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform.", + "Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5254873347880861734, + 17629927352964185068, + null, + null, + 480, + 496, + 480, + 496, + 90, + 92, + true, + "poor performance", + "poor performance" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3613728750551607700, + 11957105209137106607, + null, + null, + 514, + 539, + 514, + 539, + 95, + 98, + true, + "available graph databases", + "available graph databases" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 15602286436515055909, + 6910187018044769346, + null, + null, + 556, + 572, + 556, + 572, + 102, + 105, + true, + "new graph engine", + "new graph engine" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 12779036928191531604, + 9018866942868051774, + null, + null, + 581, + 593, + 581, + 593, + 107, + 109, + true, + "CPS platform", + "CPS platform" + ], + [ + "sentence", + "proper", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 905311317995826078, + 15561031818904732595, + null, + null, + 595, + 761, + 595, + 761, + 110, + 145, + true, + "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", + "This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast." + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 2924972194163802578, + 14724717228620861174, + null, + null, + 600, + 612, + 600, + 612, + 111, + 113, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 13549848866802287556, + 8151422605240501484, + null, + null, + 632, + 646, + 632, + 646, + 117, + 119, + true, + "advanced graph", + "advanced graph" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 18014693639644065312, + 3586133881645139780, + null, + null, + 641, + 656, + 641, + 656, + 118, + 121, + true, + "graph-analytics", + "graph-analytics" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 5946726878008170720, + 7296653544607949572, + null, + null, + 647, + 656, + 647, + 656, + 120, + 121, + true, + "analytics", + "analytics" + ], + [ + "numval", + "ival", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 17767354399704235162, + 13308611356903088115, + null, + null, + 657, + 658, + 657, + 658, + 121, + 122, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 18215467199078216516, + 4540466417441672306, + null, + null, + 670, + 691, + 670, + 691, + 125, + 128, + true, + "evaluate deep queries", + "evaluate deep queries" + ], + [ + "expression", + "word-concatenation", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 6180052837118668048, + 18178055246764646091, + null, + null, + 697, + 706, + 697, + 706, + 129, + 132, + true, + "multi-hop", + "multi-hop" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 329104161505536647, + 7399874972948232234, + null, + null, + 697, + 702, + 697, + 702, + 129, + 130, + true, + "multi", + "multi" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 10552270358215062354, + 4544438113121545290, + null, + null, + 703, + 717, + 703, + 717, + 131, + 133, + true, + "hop traversals", + "hop traversals" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 13692197547477852594, + 14292005679357148705, + null, + null, + 721, + 733, + 721, + 733, + 134, + 136, + true, + "large graphs", + "large graphs" + ], + [ + "parenthesis", + "round brackets", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 3451858675896043664, + 6364806088702961820, + null, + null, + 734, + 745, + 734, + 745, + 136, + 142, + true, + "(>1B edges)", + "(>1B edges)" + ], + [ + "numval", + "ival", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 17767354399704235161, + 13308611356852178135, + null, + null, + 736, + 737, + 736, + 737, + 138, + 139, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 285583876932865368, + "TEXT", + "#/texts/74", + 1.0, + 8106350691486682096, + 14145095568144885133, + null, + null, + 737, + 744, + 737, + 744, + 139, + 141, + true, + "B edges", + "B edges" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/75", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 17767354399704235159, + 18348318207235940730, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "7", + "7" + ], + [ + "sentence", + "improper", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 15441160910541485670, + 2772137128821491569, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549257370278754, + "TEXT", + "#/texts/76", + 1.0, + 15441160910541481979, + 2772124700731079428, + null, + null, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "proper", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 15169275422072685506, + 8924490843340168699, + null, + null, + 0, + 88, + 0, + 88, + 0, + 17, + true, + "In the remaining part of this section, we elaborate on our newly developed graph engine.", + "In the remaining part of this section, we elaborate on our newly developed graph engine." + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 389609625632304952, + 17729073815639454901, + null, + null, + 17, + 21, + 17, + 21, + 3, + 4, + true, + "part", + "part" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751620002, + null, + null, + 30, + 37, + 30, + 37, + 6, + 7, + true, + "section", + "section" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 2924972194163802578, + 1343185001122892048, + null, + null, + 75, + 87, + 75, + 87, + 14, + 16, + true, + "graph engine", + "graph engine" + ], + [ + "sentence", + "proper", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 10870737769007965775, + 8022598482893376902, + null, + null, + 89, + 142, + 89, + 142, + 17, + 29, + true, + "In section 3.1, we discuss the implementation design.", + "In section 3.1, we discuss the implementation design." + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751616082, + null, + null, + 92, + 99, + 92, + 99, + 18, + 19, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435198, + 14026574630810798704, + null, + null, + 100, + 103, + 100, + 103, + 19, + 22, + true, + "3.1", + "3.1" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 5689391492622578219, + 4178339675981596420, + null, + null, + 120, + 141, + 120, + 141, + 26, + 28, + true, + "implementation design", + "implementation design" + ], + [ + "sentence", + "proper", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 16126804403293380759, + 942571218184287509, + null, + null, + 143, + 324, + 143, + 324, + 29, + 71, + true, + "In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", + "In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine." + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751613295, + null, + null, + 146, + 153, + 146, + 153, + 30, + 31, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435199, + 14026574630786503486, + null, + null, + 154, + 157, + 154, + 157, + 31, + 34, + true, + "3.2", + "3.2" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 7309351122725453953, + 1424245629440322320, + null, + null, + 170, + 189, + 170, + 189, + 37, + 39, + true, + "performance results", + "performance results" + ], + [ + "expression", + "wtoken-concatenation", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 329104162105779366, + 17726293975143479221, + null, + null, + 208, + 213, + 208, + 213, + 43, + 46, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896300292, + 14026349734474525868, + null, + null, + 208, + 211, + 208, + 211, + 43, + 44, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 17767354399704235156, + 5196757730407799108, + null, + null, + 211, + 212, + 211, + 212, + 44, + 45, + true, + "4", + "4" + ], + [ + "name", + "person-name", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106351243953564135, + 5354954892260486825, + null, + null, + 212, + 220, + 212, + 220, + 45, + 48, + true, + "J Later", + "J. Later" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 8106478708629288965, + 9706904241751509683, + null, + null, + 225, + 232, + 225, + 232, + 50, + 51, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 12178341415896435196, + 14026574630635842602, + null, + null, + 233, + 236, + 233, + 236, + 51, + 54, + true, + "3.3", + "3.3" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 7076268937724050913, + 17647570497214443245, + null, + null, + 262, + 274, + 262, + 274, + 60, + 62, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 13183039880198077038, + "TEXT", + "#/texts/77", + 1.0, + 2924972194163802578, + 1343185001122940285, + null, + null, + 311, + 323, + 311, + 323, + 68, + 70, + true, + "graph engine", + "graph engine" + ], + [ + "numval", + "fval", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 12178341415896435198, + 3629736405801839701, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.1", + "3.1" + ], + [ + "sentence", + "improper", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 14601374641425905440, + 4335396186787100417, + null, + null, + 4, + 32, + 4, + 32, + 3, + 9, + true, + "| Design of the graph engine", + "| Design of the graph engine" + ], + [ + "term", + "single-term", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 16381206533755764332, + 8796133857281518442, + null, + null, + 6, + 12, + 6, + 12, + 4, + 5, + true, + "Design", + "Design" + ], + [ + "term", + "single-term", + 13428900458866068249, + "TEXT", + "#/texts/78", + 1.0, + 2924972194163802578, + 1091696909158573450, + null, + null, + 20, + 32, + 20, + 32, + 7, + 9, + true, + "graph engine", + "graph engine" + ], + [ + "sentence", + "proper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 11829310439673149559, + 1096556366946463125, + null, + null, + 0, + 147, + 0, + 147, + 0, + 24, + true, + "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices.", + "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices." + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 4736549060189165039, + 5665060641995713376, + null, + null, + 3, + 19, + 3, + 19, + 1, + 3, + true, + "computer science", + "computer science" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 9799555633140790718, + 11746588474588557255, + null, + null, + 25, + 57, + 25, + 57, + 5, + 8, + true, + "prevalent implementation schemes", + "prevalent implementation schemes" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206539879417749, + 6940358010355959125, + null, + null, + 62, + 68, + 62, + 68, + 9, + 10, + true, + "graphs", + "graphs" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 3120046212755594191, + 15111940143832491527, + null, + null, + 93, + 108, + 93, + 108, + 15, + 17, + true, + "adjacency lists", + "adjacency lists" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16579929503880818246, + 9745979266607673578, + null, + null, + 128, + 146, + 128, + 146, + 21, + 23, + true, + "adjacency matrices", + "adjacency matrices" + ], + [ + "numval", + "fval", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104147748831777, + 2423845697831217766, + null, + null, + 148, + 153, + 148, + 153, + 24, + 27, + true, + "13,14", + "13,14" + ], + [ + "sentence", + "proper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6164571272575257207, + 6881600727512470107, + null, + null, + 154, + 279, + 154, + 279, + 27, + 49, + true, + "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors.", + "In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors." + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 8120096385401382748, + 6847735266077093, + null, + null, + 161, + 182, + 161, + 182, + 29, + 32, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625621164460, + 15514573413788730542, + null, + null, + 190, + 194, + 190, + 194, + 34, + 35, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206566431505764, + 3115955576590759354, + null, + null, + 213, + 219, + 213, + 219, + 38, + 39, + true, + "object", + "object" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 12178341415895638602, + 7154652167856968436, + null, + null, + 237, + 240, + 237, + 240, + 42, + 43, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 8106398345633211267, + 17718037347447073081, + null, + null, + 244, + 251, + 244, + 251, + 44, + 45, + true, + "indices", + "indices" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6169326768163434458, + 7636521745233070077, + null, + null, + 269, + 278, + 269, + 278, + 47, + 48, + true, + "neighbors", + "neighbors" + ], + [ + "sentence", + "improper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 15441160910540903299, + 501565812163902881, + null, + null, + 280, + 286, + 280, + 282, + 49, + 50, + true, + "\u2020\u2020", + "\u2020\u2020" + ], + [ + "sentence", + "proper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 1300122478685074787, + 11675804466477020290, + null, + null, + 287, + 344, + 283, + 340, + 50, + 62, + true, + "The edges are therefore stored as a property of the node.", + "The edges are therefore stored as a property of the node." + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104162186494203, + 7406953761343574094, + null, + null, + 291, + 296, + 287, + 292, + 51, + 52, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 14814125841683215315, + 12241347891873585821, + null, + null, + 323, + 331, + 319, + 327, + 57, + 58, + true, + "property", + "property" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625621164460, + 15514573413788712138, + null, + null, + 339, + 343, + 335, + 339, + 60, + 61, + true, + "node", + "node" + ], + [ + "sentence", + "proper", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 17653262178725528111, + 14906657644492502176, + null, + null, + 345, + 502, + 341, + 498, + 62, + 91, + true, + "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", + "In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples." + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 6848778759897299700, + 6443024831232567950, + null, + null, + 352, + 377, + 348, + 373, + 64, + 67, + true, + "adjacency matrix approach", + "adjacency matrix approach" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104161758737773, + 7410031061966183022, + null, + null, + 383, + 388, + 379, + 384, + 69, + 70, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 15995920061809434509, + 1660220959282764167, + null, + null, + 399, + 409, + 395, + 405, + 72, + 73, + true, + "identifier", + "identifier" + ], + [ + "parenthesis", + "round brackets", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 10362086140065524622, + 13319597512915342985, + null, + null, + 410, + 441, + 406, + 437, + 73, + 79, + true, + "(typically an unsigned integer)", + "(typically an unsigned integer)" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 11971109509670241646, + 13587803609989610789, + null, + null, + 424, + 440, + 420, + 436, + 76, + 78, + true, + "unsigned integer", + "unsigned integer" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 329104162186494203, + 7406953761389627803, + null, + null, + 450, + 455, + 446, + 451, + 81, + 82, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 389609625633315922, + 15508611803302681211, + null, + null, + 472, + 476, + 468, + 472, + 86, + 87, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 1430911655724119030, + "TEXT", + "#/texts/79", + 1.0, + 16381206516227726330, + 7881028199436551027, + null, + null, + 495, + 501, + 491, + 497, + 89, + 90, + true, + "tuples", + "tuples" + ], + [ + "sentence", + "proper", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 14752907807764014625, + 7112639404154086708, + null, + null, + 0, + 110, + 0, + 110, + 0, + 19, + true, + "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra.", + "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra." + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 20806960854514546, + 8439122538985534303, + null, + null, + 26, + 47, + 26, + 47, + 5, + 8, + true, + "most graph operations", + "most graph operations" + ], + [ + "expression", + "word-concatenation", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12320233288833810715, + 13142345776581091864, + null, + null, + 71, + 88, + 71, + 88, + 12, + 15, + true, + "matrix-operations", + "matrix-operations" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 16381206594266103973, + 1605408288839874347, + null, + null, + 71, + 77, + 71, + 77, + 12, + 13, + true, + "matrix", + "matrix" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 13985988710970420061, + 15670955031366468010, + null, + null, + 78, + 88, + 78, + 88, + 14, + 15, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 11590149467838756247, + 4637729301611600179, + null, + null, + 95, + 109, + 95, + 109, + 16, + 18, + true, + "linear algebra", + "linear algebra" + ], + [ + "numval", + "ival", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 15441160910541481977, + 17073318187218057934, + null, + null, + 111, + 113, + 111, + 113, + 19, + 20, + true, + "13", + "13" + ], + [ + "sentence", + "proper", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 7006429886037621508, + 125144349920919638, + null, + null, + 114, + 159, + 114, + 159, + 20, + 30, + true, + "For example, consider the graph-traversal V !", + "For example, consider the graph-traversal V !" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106397496085150773, + 13505373486844891217, + null, + null, + 118, + 125, + 118, + 125, + 21, + 22, + true, + "example", + "example" + ], + [ + "expression", + "word-concatenation", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8759553427650775934, + 3946005403065402451, + null, + null, + 140, + 155, + 140, + 155, + 25, + 28, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 329104159211247965, + 3322884300217806445, + null, + null, + 140, + 145, + 140, + 145, + 25, + 26, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12468293841864465959, + 3129159938478991388, + null, + null, + 146, + 157, + 146, + 157, + 27, + 29, + true, + "traversal V", + "traversal V" + ], + [ + "sentence", + "proper", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 2226059187471967763, + 8569977295461945765, + null, + null, + 160, + 269, + 160, + 269, + 30, + 59, + true, + "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W.", + "A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W." + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 12178341415895638602, + 6908352950519464398, + null, + null, + 190, + 193, + 190, + 193, + 39, + 40, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106352617178756957, + 5831781823529690849, + null, + null, + 197, + 204, + 197, + 204, + 41, + 43, + true, + "nodes V", + "nodes V" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 389609625699630670, + 16553309401039496143, + null, + null, + 222, + 226, + 222, + 226, + 46, + 47, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 329104161571401725, + 2466996076977359002, + null, + null, + 232, + 237, + 232, + 237, + 49, + 50, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106342689900874417, + 3060939093264365538, + null, + null, + 250, + 257, + 250, + 257, + 53, + 55, + true, + "new set", + "new set" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 8106352617178756958, + 5831781823513588259, + null, + null, + 261, + 268, + 261, + 268, + 56, + 58, + true, + "nodes W", + "nodes W" + ], + [ + "sentence", + "improper", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 17852491679655048183, + 3567971614464094913, + null, + null, + 270, + 324, + 270, + 324, + 59, + 68, + true, + "This can be directly translated into linear algebra as", + "This can be directly translated into linear algebra as" + ], + [ + "term", + "single-term", + 13770706479324480755, + "TEXT", + "#/texts/80", + 1.0, + 11590149467838756247, + 4637729301611654414, + null, + null, + 307, + 321, + 307, + 321, + 65, + 67, + true, + "linear algebra", + "linear algebra" + ], + [ + "sentence", + "improper", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235223, + 16151623699919035273, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "w", + "w" + ], + [ + "sentence", + "proper", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 4763538990111593484, + 7324065661356045772, + null, + null, + 2, + 14, + 2, + 14, + 1, + 10, + true, + "$^{!}$= Av !", + "$^{!}$= Av !" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 329104159258693175, + 15542061184572896869, + null, + null, + 2, + 9, + 2, + 9, + 1, + 8, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 15441160910541480528, + 6320979252162665530, + null, + null, + 10, + 12, + 10, + 12, + 8, + 9, + true, + "Av", + "Av" + ], + [ + "sentence", + "improper", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 15705096823684294974, + 17789840971827485592, + null, + null, + 15, + 91, + 15, + 89, + 10, + 47, + true, + "with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", + "with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 5948620232447446819, + 6902872370209677045, + null, + null, + 22, + 35, + 22, + 35, + 12, + 25, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235161, + 16151623650567223960, + null, + null, + 36, + 37, + 36, + 37, + 25, + 26, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235160, + 16151623650470238720, + null, + null, + 53, + 54, + 53, + 54, + 31, + 32, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 17767354399704235162, + 16151623650448785184, + null, + null, + 67, + 68, + 67, + 68, + 36, + 37, + true, + "2", + "2" + ], + [ + "expression", + "wtoken-concatenation", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 7116489890516680880, + 17126918343435525249, + null, + null, + 73, + 83, + 73, + 83, + 39, + 44, + true, + "GLYPH", + "GLYPH" + ], + [ + "term", + "single-term", + 11165481757050847950, + "TEXT", + "#/texts/81", + 1.0, + 8106479216955883814, + 17204726552770910482, + null, + null, + 73, + 80, + 73, + 80, + 39, + 42, + true, + "GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", + "GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 3078199901586211847, + 17286928490334208994, + null, + null, + 39, + 59, + 37, + 57, + 27, + 37, + true, + "GLYPHGLYPH", + "GLYPHGLYPH" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881521913, + null, + null, + 46, + 48, + 44, + 46, + 30, + 31, + true, + "16", + "16" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734449247, + null, + null, + 56, + 58, + 54, + 56, + 35, + 36, + true, + "17", + "17" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516676705, + 2094556379511614885, + null, + null, + 60, + 70, + 58, + 68, + 37, + 42, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881497232, + null, + null, + 67, + 69, + 65, + 67, + 40, + 41, + true, + "16", + "16" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516677026, + 2094556361631692442, + null, + null, + 71, + 81, + 69, + 79, + 42, + 47, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734455276, + null, + null, + 78, + 80, + 76, + 78, + 45, + 46, + true, + "17", + "17" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516676705, + 2094556379511615262, + null, + null, + 82, + 92, + 80, + 90, + 47, + 52, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481860, + 1648917876881494293, + null, + null, + 89, + 91, + 87, + 89, + 50, + 51, + true, + "16", + "16" + ], + [ + "expression", + "wtoken-concatenation", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 7116489890516677026, + 2094556361631691128, + null, + null, + 93, + 103, + 91, + 101, + 52, + 57, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 15441160910541481861, + 1648917874734450570, + null, + null, + 100, + 102, + 98, + 100, + 55, + 56, + true, + "17", + "17" + ], + [ + "numval", + "ival", + 14951391138799557075, + "TEXT", + "#/texts/83", + 1.0, + 17767354399704235162, + 10344599291481597093, + null, + null, + 109, + 110, + 106, + 107, + 59, + 60, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 5775536069845081632, + 13168819924872490931, + null, + null, + 0, + 108, + 0, + 108, + 0, + 20, + true, + "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently.", + "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently." + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 7076268937724050913, + 16904614497174539564, + null, + null, + 11, + 23, + 11, + 23, + 2, + 4, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654176293, + null, + null, + 66, + 74, + 66, + 74, + 11, + 12, + true, + "Equation", + "Equation" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487463376, + null, + null, + 75, + 78, + 75, + 78, + 12, + 15, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423630693, + null, + null, + 76, + 77, + 76, + 77, + 13, + 14, + true, + "1", + "1" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 18120357783789340975, + 10586026389391004299, + null, + null, + 109, + 293, + 109, + 293, + 20, + 53, + true, + "Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs.", + "Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs." + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16264934656264211635, + 4511209860311708157, + null, + null, + 118, + 130, + 118, + 130, + 22, + 24, + true, + "past decades", + "past decades" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625633531007, + 8072331032130786374, + null, + null, + 132, + 136, + 132, + 136, + 25, + 26, + true, + "lots", + "lots" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14634109233387695059, + 8230722811753480240, + null, + null, + 140, + 148, + 140, + 148, + 27, + 28, + true, + "research", + "research" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9489326438360715521, + 16290129180775347077, + null, + null, + 175, + 211, + 175, + 211, + 33, + 37, + true, + "High Performance Computing community", + "High Performance Computing community" + ], + [ + "term", + "enum-term-mark-2", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 11536514477790993181, + 1172284701792082182, + null, + null, + 219, + 251, + 219, + 251, + 39, + 42, + true, + "acceleration and parallelization", + "acceleration and parallelization" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 7389039184143186430, + 4020754544621133412, + null, + null, + 219, + 231, + 219, + 231, + 39, + 40, + true, + "acceleration", + "acceleration" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4992630253996742003, + 9350224994567595782, + null, + null, + 236, + 251, + 236, + 251, + 41, + 42, + true, + "parallelization", + "parallelization" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654222038, + null, + null, + 255, + 263, + 255, + 263, + 43, + 44, + true, + "Equation", + "Equation" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487507881, + null, + null, + 264, + 267, + 264, + 267, + 44, + 47, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423585534, + null, + null, + 265, + 266, + 265, + 266, + 45, + 46, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106398484416909789, + 6445237109781063638, + null, + null, + 275, + 282, + 275, + 282, + 49, + 50, + true, + "context", + "context" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206539879417749, + 14212076125635256438, + null, + null, + 286, + 292, + 286, + 292, + 51, + 52, + true, + "graphs", + "graphs" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14902507466498554187, + 4746952862969696045, + null, + null, + 294, + 502, + 294, + 502, + 53, + 93, + true, + "In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed.", + "In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed." + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106398484416909789, + 6445237109781064988, + null, + null, + 302, + 309, + 302, + 309, + 55, + 56, + true, + "context", + "context" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206594266103973, + 11886141757564266520, + null, + null, + 315, + 321, + 315, + 321, + 58, + 59, + true, + "matrix", + "matrix" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 11302648084487719921, + 15524726029551673233, + null, + null, + 342, + 358, + 342, + 358, + 64, + 66, + true, + "linear operation", + "linear operation" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654222955, + null, + null, + 362, + 370, + 362, + 370, + 67, + 68, + true, + "Equation", + "Equation" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487478174, + null, + null, + 371, + 374, + 371, + 374, + 68, + 71, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423582023, + null, + null, + 372, + 373, + 372, + 373, + 69, + 70, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 15221837707742504836, + 13831002432772278526, + null, + null, + 395, + 430, + 395, + 430, + 76, + 80, + true, + "sparse matrix vector multiplication", + "sparse matrix vector multiplication" + ], + [ + "parenthesis", + "round brackets", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16380808301996821998, + 15698441970854417697, + null, + null, + 431, + 437, + 431, + 437, + 80, + 83, + true, + "(SpMV)", + "(SpMV)" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625540553279, + 8138653950944491239, + null, + null, + 432, + 436, + 432, + 436, + 81, + 82, + true, + "SpMV", + "SpMV" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 5211900619579820608, + 5678836820111735314, + null, + null, + 466, + 481, + 466, + 481, + 88, + 89, + true, + "implementations", + "implementations" + ], + [ + "numval", + "fval", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 329104147748297973, + 7115759532919018249, + null, + null, + 503, + 508, + 503, + 508, + 93, + 96, + true, + "15,16", + "15,16" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 972796118155588075, + 768003519665868807, + null, + null, + 509, + 600, + 509, + 600, + 96, + 111, + true, + "Notably, most advanced graph-analytical operations can be formulated using SpMV operations.", + "Notably, most advanced graph-analytical operations can be formulated using SpMV operations." + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 13549848866802287556, + 16102673275631331573, + null, + null, + 523, + 537, + 523, + 537, + 99, + 101, + true, + "advanced graph", + "advanced graph" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 369269646322723959, + 3452033464673941326, + null, + null, + 532, + 548, + 532, + 548, + 100, + 103, + true, + "graph-analytical", + "graph-analytical" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4327043859883556076, + 1627401650092296014, + null, + null, + 538, + 559, + 538, + 559, + 102, + 104, + true, + "analytical operations", + "analytical operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4304811534835870538, + 1394572827254744857, + null, + null, + 584, + 599, + 584, + 599, + 108, + 110, + true, + "SpMV operations", + "SpMV operations" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9907555007325257702, + 14048835156430871966, + null, + null, + 601, + 731, + 601, + 731, + 111, + 137, + true, + "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !", + "The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w !" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 3064129899577325203, + 17940686330604280218, + null, + null, + 610, + 622, + 610, + 622, + 113, + 115, + true, + "trivial case", + "trivial case" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 6184977925120246739, + 3173903523454537724, + null, + null, + 626, + 635, + 626, + 635, + 116, + 119, + true, + "page-rank", + "page-rank" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625632301461, + 8073774284142458229, + null, + null, + 626, + 630, + 626, + 630, + 116, + 117, + true, + "page", + "page" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625632775501, + 8072379518130801385, + null, + null, + 631, + 635, + 631, + 635, + 118, + 119, + true, + "rank", + "rank" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 14650266729725885817, + 10065410986654148478, + null, + null, + 671, + 679, + 671, + 679, + 125, + 126, + true, + "Equation", + "Equation" + ], + [ + "parenthesis", + "reference", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12178341415896395122, + 15809942743487485425, + null, + null, + 680, + 683, + 680, + 683, + 126, + 129, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235161, + 18186955703423684934, + null, + null, + 681, + 682, + 681, + 682, + 127, + 128, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 2989796650905950968, + 6786698869169380411, + null, + null, + 687, + 698, + 687, + 698, + 130, + 131, + true, + "combination", + "combination" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 18007575068397390542, + 14744607835693282246, + null, + null, + 706, + 721, + 706, + 721, + 133, + 134, + true, + "renormalization", + "renormalization" + ], + [ + "sentence", + "improper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12264509556052062994, + 5123928862892494153, + null, + null, + 732, + 745, + 732, + 745, + 137, + 141, + true, + "is equal to v", + "is equal to v" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 8106340349709889698, + 8414473377762557457, + null, + null, + 746, + 753, + 746, + 753, + 141, + 148, + true, + "$^{!}$.", + "$^{!}$." + ], + [ + "expression", + "latex", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 389609625699793568, + 8079745072465029660, + null, + null, + 746, + 752, + 746, + 752, + 141, + 147, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "proper", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12894202728737595682, + 16352149120081517995, + null, + null, + 754, + 960, + 754, + 960, + 148, + 185, + true, + "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", + "In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations." + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 12580512760652482076, + 13583953375349869788, + null, + null, + 761, + 774, + 761, + 774, + 150, + 152, + true, + "previous work", + "previous work" + ], + [ + "numval", + "ival", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 17767354399704235162, + 18186955704176922280, + null, + null, + 776, + 777, + 776, + 777, + 153, + 154, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 16381206568246674273, + 11429613659753629348, + null, + null, + 800, + 806, + 800, + 806, + 159, + 160, + true, + "detail", + "detail" + ], + [ + "expression", + "word-concatenation", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 369269646322723959, + 3452033464673955722, + null, + null, + 821, + 837, + 821, + 837, + 162, + 165, + true, + "graph-analytical", + "graph-analytical" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 329104159211247965, + 9136957105756069094, + null, + null, + 821, + 826, + 821, + 826, + 162, + 163, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4327043859883556076, + 1627401650092220054, + null, + null, + 827, + 848, + 827, + 848, + 164, + 166, + true, + "analytical operations", + "analytical operations" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 4977542118862070209, + 12350177894838464292, + null, + null, + 857, + 874, + 857, + 874, + 168, + 170, + true, + "node centralities", + "node centralities" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 9079575722732701095, + 13885675971977617571, + null, + null, + 879, + 896, + 879, + 896, + 171, + 173, + true, + "spectral analysis", + "spectral analysis" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 329104159211247965, + 9136957105756070646, + null, + null, + 904, + 909, + 904, + 909, + 175, + 176, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 16602156009514813718, + "TEXT", + "#/texts/84", + 1.0, + 10146385311913577445, + 3881716146591211694, + null, + null, + 939, + 959, + 939, + 959, + 181, + 184, + true, + "only SpMV operations", + "only SpMV operations" + ], + [ + "sentence", + "proper", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 13884895358995816532, + 10671792459590835108, + null, + null, + 0, + 204, + 0, + 204, + 0, + 37, + true, + "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", + "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format." + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 7076268937724050913, + 14136229895878741561, + null, + null, + 11, + 23, + 11, + 23, + 2, + 4, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 1325639643510008878, + 3345783597167709430, + null, + null, + 28, + 52, + 28, + 52, + 5, + 8, + true, + "advanced graph analytics", + "advanced graph analytics" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 13973298705492850553, + 16427562040426690599, + null, + null, + 75, + 91, + 75, + 91, + 12, + 15, + true, + "fast SpMV kernel", + "fast SpMV kernel" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 2924972194163802578, + 13928333483376329414, + null, + null, + 121, + 133, + 121, + 133, + 22, + 24, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 12779036928191531604, + 14066357852666934661, + null, + null, + 141, + 153, + 141, + 153, + 26, + 28, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 7162849562576593449, + "TEXT", + "#/texts/85", + 1.0, + 17729840004664227381, + 11831936904412939564, + null, + null, + 180, + 203, + 180, + 203, + 33, + 36, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "numval", + "fval", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 12178341415896435199, + 16109275631913765862, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.2", + "3.2" + ], + [ + "sentence", + "improper", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 18417459381709426233, + 10020576354403167023, + null, + null, + 4, + 54, + 4, + 54, + 3, + 9, + true, + "| Memory architecture and performance optimization", + "| Memory architecture and performance optimization" + ], + [ + "term", + "single-term", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 870113469708492800, + 18315576896615919675, + null, + null, + 6, + 25, + 6, + 25, + 4, + 6, + true, + "Memory architecture", + "Memory architecture" + ], + [ + "term", + "enum-term-mark-2", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 718073221538665455, + 25931046609376007, + null, + null, + 13, + 54, + 13, + 54, + 5, + 9, + true, + "architecture and performance optimization", + "architecture and performance optimization" + ], + [ + "term", + "single-term", + 15385417954505503552, + "TEXT", + "#/texts/86", + 1.0, + 6000441818249848958, + 4119448448060994558, + null, + null, + 30, + 54, + 30, + 54, + 7, + 9, + true, + "performance optimization", + "performance optimization" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10270848120294375051, + 14807652315789881244, + null, + null, + 0, + 115, + 0, + 115, + 0, + 16, + true, + "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages.", + "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 3120046212755594191, + 2344204684828230349, + null, + null, + 5, + 20, + 5, + 20, + 1, + 3, + true, + "adjacency lists", + "adjacency lists" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16579929503880818246, + 11016431613593929882, + null, + null, + 25, + 43, + 25, + 43, + 4, + 6, + true, + "adjacency matrices", + "adjacency matrices" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4206645798095581420, + 12345799280851160234, + null, + null, + 35, + 49, + 35, + 49, + 5, + 8, + true, + "matrices-based", + "matrices-based" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 209178867647501045, + 4306171030829307543, + null, + null, + 50, + 71, + 50, + 71, + 8, + 10, + true, + "graph implementations", + "graph implementations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 11527770354380822892, + 1525925456019954586, + null, + null, + 77, + 96, + 77, + 96, + 11, + 13, + true, + "specific advantages", + "specific advantages" + ], + [ + "term", + "enum-term-mark-3", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 13909108517714618598, + 9720415877503432059, + null, + null, + 86, + 114, + 86, + 114, + 12, + 15, + true, + "advantages and disadvantages", + "advantages and disadvantages" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 13058502189641135024, + 7914974336984974505, + null, + null, + 101, + 114, + 101, + 114, + 14, + 15, + true, + "disadvantages", + "disadvantages" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16964777468013118327, + 12125593630312832079, + null, + null, + 116, + 281, + 116, + 281, + 16, + 46, + true, + "The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals.", + "The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673840453, + null, + null, + 120, + 141, + 120, + 141, + 17, + 20, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16287604968962194110, + 17739614045833498641, + null, + null, + 166, + 178, + 166, + 178, + 25, + 28, + true, + "node-centric", + "node-centric" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625621164460, + 16796069565039840985, + null, + null, + 166, + 170, + 166, + 170, + 25, + 26, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 6221025016300311409, + 2885348311430487489, + null, + null, + 171, + 189, + 171, + 189, + 27, + 29, + true, + "centric operations", + "centric operations" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8248539907401009667, + 2265519894835621749, + null, + null, + 208, + 221, + 208, + 221, + 32, + 35, + true, + "data-locality", + "data-locality" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625696431489, + 16797208720290908173, + null, + null, + 208, + 212, + 208, + 212, + 32, + 33, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14639575749166613576, + 13568917484906563413, + null, + null, + 213, + 221, + 213, + 221, + 34, + 35, + true, + "locality", + "locality" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4924931942798985710, + 12826410515891443201, + null, + null, + 226, + 248, + 226, + 248, + 36, + 39, + true, + "local graph operations", + "local graph operations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1107787356815021239, + 15363999495061554235, + null, + null, + 258, + 280, + 258, + 280, + 42, + 45, + true, + "first order traversals", + "first order traversals" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15450473824681718350, + 11316135833913700142, + null, + null, + 282, + 416, + 282, + 416, + 46, + 69, + true, + "However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics.", + "However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1457142279144648781, + 1447675611503317247, + null, + null, + 316, + 345, + 316, + 345, + 52, + 56, + true, + "global scale graph operations", + "global scale graph operations" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 7076268937724050913, + 4243200811936881452, + null, + null, + 370, + 382, + 370, + 382, + 61, + 63, + true, + "deep queries", + "deep queries" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 1325639643510008878, + 2772141157715748489, + null, + null, + 391, + 415, + 391, + 415, + 65, + 68, + true, + "advanced graph analytics", + "advanced graph analytics" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 7827972506356892881, + 14520623047921494047, + null, + null, + 417, + 559, + 417, + 559, + 69, + 97, + true, + "Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes.", + "Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes." + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940078062, + null, + null, + 452, + 468, + 452, + 468, + 76, + 79, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104159211247965, + 11348167646242253910, + null, + null, + 452, + 457, + 452, + 457, + 76, + 77, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8619280805974492668, + 15753998575971508119, + null, + null, + 458, + 468, + 458, + 468, + 78, + 79, + true, + "traversals", + "traversals" + ], + [ + "parenthesis", + "round brackets", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 12519231069705186881, + 3244334725177660806, + null, + null, + 488, + 501, + 488, + 501, + 82, + 87, + true, + "(or even all)", + "(or even all)" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104161758737773, + 17985157174507178569, + null, + null, + 502, + 507, + 502, + 507, + 87, + 88, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206557786164800, + 7873023782251793662, + null, + null, + 529, + 535, + 529, + 535, + 91, + 92, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104161758737773, + 17985157174507501037, + null, + null, + 553, + 558, + 553, + 558, + 95, + 96, + true, + "nodes", + "nodes" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4011742133887184242, + 11488487201968882536, + null, + null, + 560, + 674, + 560, + 674, + 97, + 118, + true, + "In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance.", + "In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673711036, + null, + null, + 566, + 587, + 566, + 587, + 99, + 102, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14217562351589216718, + 11354169541202862310, + null, + null, + 609, + 626, + 609, + 626, + 107, + 110, + true, + "many cache misses", + "many cache misses" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 6168355606348623882, + 17222187151520418992, + null, + null, + 634, + 643, + 634, + 643, + 111, + 112, + true, + "execution", + "execution" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16554996714335964809, + 1433800992424113617, + null, + null, + 658, + 673, + 658, + 673, + 115, + 117, + true, + "low performance", + "low performance" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17312495112187822630, + 6156571435774831825, + null, + null, + 675, + 842, + 675, + 842, + 118, + 141, + true, + "Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution.", + "Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17258313969216874928, + 9071727428053449720, + null, + null, + 702, + 714, + 702, + 714, + 121, + 123, + true, + "global graph", + "global graph" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940158702, + null, + null, + 709, + 725, + 709, + 725, + 122, + 125, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8619280805974492668, + 15753998575971490199, + null, + null, + 715, + 725, + 715, + 725, + 124, + 125, + true, + "traversals", + "traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8120096385401382748, + 7910554425673690012, + null, + null, + 733, + 754, + 733, + 754, + 127, + 130, + true, + "adjacency list format", + "adjacency list format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 2315331045020242414, + 377342086051294718, + null, + null, + 782, + 808, + 782, + 808, + 133, + 136, + true, + "concurrent write conflicts", + "concurrent write conflicts" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8106478041490969672, + 12943647009928962637, + null, + null, + 817, + 824, + 817, + 824, + 137, + 138, + true, + "threads", + "threads" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 6168355606348623882, + 17222187151520399878, + null, + null, + 832, + 841, + 832, + 841, + 139, + 140, + true, + "execution", + "execution" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4883310113983762862, + 957944940518831119, + null, + null, + 843, + 910, + 843, + 910, + 141, + 153, + true, + "In the adjacency matrix format, these problems are not encountered.", + "In the adjacency matrix format, these problems are not encountered." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 17729840004664227381, + 4716030849938756040, + null, + null, + 850, + 873, + 850, + 873, + 143, + 146, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14814125877433299736, + 17739915814128640735, + null, + null, + 881, + 889, + 881, + 889, + 148, + 149, + true, + "problems", + "problems" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 18351971490013416011, + 4499762574647525101, + null, + null, + 911, + 1033, + 911, + 1033, + 153, + 178, + true, + "The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV).", + "The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV)." + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 10308187620027892234, + 10064995642940178589, + null, + null, + 915, + 931, + 915, + 931, + 154, + 157, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104159211247965, + 11348167646242284666, + null, + null, + 915, + 920, + 915, + 920, + 154, + 155, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8619280805974492668, + 15753998575971475283, + null, + null, + 921, + 931, + 921, + 931, + 156, + 157, + true, + "traversals", + "traversals" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625540553279, + 16811944284857720705, + null, + null, + 966, + 970, + 966, + 970, + 163, + 164, + true, + "SpMV", + "SpMV" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4747238687892740263, + 10797718497119362743, + null, + null, + 981, + 994, + 981, + 994, + 167, + 170, + true, + "sparse-matrix", + "sparse-matrix" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206579218541733, + 8525350541413229658, + null, + null, + 981, + 987, + 981, + 987, + 167, + 168, + true, + "sparse", + "sparse" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 8221073144122590356, + 9945113326549542538, + null, + null, + 988, + 1001, + 988, + 1001, + 169, + 171, + true, + "matrix sparse", + "matrix sparse" + ], + [ + "expression", + "wtoken-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 4747241205902184445, + 17195728821214094844, + null, + null, + 995, + 1008, + 995, + 1008, + 170, + 173, + true, + "sparse-vector", + "sparse-vector" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14706921468550924857, + 356797224531692029, + null, + null, + 1002, + 1023, + 1002, + 1023, + 172, + 174, + true, + "vector multiplication", + "vector multiplication" + ], + [ + "parenthesis", + "round brackets", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14654336952868801491, + 17810856186567143228, + null, + null, + 1024, + 1032, + 1024, + 1032, + 174, + 177, + true, + "(SpMSpV)", + "(SpMSpV)" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206514525102695, + 15744992214345462802, + null, + null, + 1025, + 1031, + 1025, + 1031, + 175, + 176, + true, + "SpMSpV", + "SpMSpV" + ], + [ + "sentence", + "proper", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 3718840373702004760, + 15287027377710879857, + null, + null, + 1034, + 1213, + 1034, + 1213, + 178, + 212, + true, + "It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list.", + "It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list." + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 389609625540553279, + 16811944284857728765, + null, + null, + 1087, + 1091, + 1087, + 1091, + 188, + 189, + true, + "SpMV", + "SpMV" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16579222348944913804, + 16370295638287544198, + null, + null, + 1109, + 1130, + 1109, + 1130, + 192, + 194, + true, + "multithreaded fashion", + "multithreaded fashion" + ], + [ + "expression", + "word-concatenation", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 14404307070545181542, + 7498659441809181561, + null, + null, + 1152, + 1164, + 1152, + 1164, + 199, + 202, + true, + "cache-misses", + "cache-misses" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 329104161511603847, + 17979715909973621736, + null, + null, + 1152, + 1157, + 1152, + 1157, + 199, + 200, + true, + "cache", + "cache" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 16381206594553345358, + 2772222321621565622, + null, + null, + 1158, + 1164, + 1158, + 1164, + 201, + 202, + true, + "misses", + "misses" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 11359951565081966931, + 2519317996116724135, + null, + null, + 1179, + 1193, + 1179, + 1193, + 205, + 207, + true, + "clever sorting", + "clever sorting" + ], + [ + "term", + "single-term", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15699951717655812132, + 7744836953142101576, + null, + null, + 1201, + 1212, + 1201, + 1212, + 209, + 211, + true, + "tuples list", + "tuples list" + ], + [ + "numval", + "ival", + 10815650641518265876, + "TEXT", + "#/texts/87", + 1.0, + 15441160910541481861, + 93251422791520216, + null, + null, + 1214, + 1216, + 1214, + 1216, + 212, + 213, + true, + "17", + "17" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/88", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 9900070277501077316, + 14879933636210090686, + null, + null, + 0, + 208, + 0, + 204, + 0, + 44, + true, + "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3.", + "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15360629769874482523, + 14086244081592280733, + null, + null, + 18, + 28, + 18, + 28, + 3, + 4, + true, + "advantages", + "advantages" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17729840004664227381, + 2529224331117607819, + null, + null, + 36, + 59, + 36, + 59, + 6, + 9, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161565915183, + 1151200886323461192, + null, + null, + 68, + 73, + 68, + 73, + 11, + 12, + true, + "needs", + "needs" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 6285955549867796622, + 3519954987974151695, + null, + null, + 87, + 103, + 87, + 103, + 16, + 21, + true, + "time-to-solution", + "time-to-solution" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625631241985, + 7019734781741931663, + null, + null, + 87, + 91, + 87, + 91, + 16, + 17, + true, + "time", + "time" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14635106751859230946, + 195027407191503697, + null, + null, + 95, + 103, + 95, + 103, + 20, + 21, + true, + "solution", + "solution" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104053346205471, + 10439298819923435512, + null, + null, + 104, + 109, + 104, + 109, + 21, + 24, + true, + "(TTS)", + "(TTS)" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813189025, + null, + null, + 105, + 108, + 105, + 108, + 22, + 23, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106477782290185579, + 3942987813051468226, + null, + null, + 114, + 121, + 114, + 121, + 25, + 26, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161571401725, + 1123588280959364869, + null, + null, + 138, + 143, + 138, + 143, + 28, + 29, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8619280805974492668, + 13535634626495482105, + null, + null, + 147, + 157, + 147, + 157, + 30, + 31, + true, + "traversals", + "traversals" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603210985, + null, + null, + 162, + 167, + 162, + 167, + 32, + 35, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896300292, + 10294526606162848173, + null, + null, + 162, + 165, + 162, + 165, + 32, + 33, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605923351, + null, + null, + 165, + 166, + 165, + 166, + 33, + 34, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625541197668, + 10258688196052532561, + null, + null, + 166, + 174, + 166, + 170, + 34, + 36, + true, + "J \u2021\u2021", + "J \u2021\u2021" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152973716, + null, + null, + 183, + 195, + 179, + 191, + 38, + 40, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 16381206514091025767, + 877060830667997748, + null, + null, + 199, + 205, + 195, + 201, + 41, + 42, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235163, + 4257155890625182636, + null, + null, + 206, + 207, + 202, + 203, + 42, + 43, + true, + "3", + "3" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7523992103366410925, + 3988616322684329041, + null, + null, + 209, + 314, + 205, + 306, + 44, + 75, + true, + "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges).", + "We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges)." + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104161594420373, + 1121791991465001484, + null, + null, + 223, + 228, + 219, + 224, + 47, + 50, + true, + "k-hop", + "k-hop" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 10277631301245525554, + 11328741594289786650, + null, + null, + 225, + 244, + 221, + 240, + 49, + 52, + true, + "hop traversal query", + "hop traversal query" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14639714523137288664, + 14458905888448100979, + null, + null, + 252, + 260, + 248, + 256, + 54, + 56, + true, + "graph500", + "graph500" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104159211247965, + 8735376894233298341, + null, + null, + 252, + 257, + 248, + 253, + 54, + 55, + true, + "graph", + "graph" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896310600, + 10294532231444872390, + null, + null, + 257, + 260, + 253, + 256, + 55, + 56, + true, + "500", + "500" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15441160910541474145, + 17130113706324829701, + null, + null, + 261, + 265, + 257, + 259, + 56, + 57, + true, + "\u00a7\u00a7", + "\u00a7\u00a7" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8690615943213957258, + 13520407369602191453, + null, + null, + 266, + 277, + 260, + 271, + 57, + 62, + true, + "(64M edges)", + "(64M edges)" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 15441160910541481167, + 17130124993064148148, + null, + null, + 267, + 269, + 261, + 263, + 58, + 59, + true, + "64", + "64" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106471963032718189, + 12463756649278513661, + null, + null, + 269, + 276, + 263, + 270, + 59, + 61, + true, + "M edges", + "M edges" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 5469579567331425221, + 2987931756247219616, + null, + null, + 282, + 295, + 276, + 289, + 63, + 66, + true, + "twitter-graph", + "twitter-graph" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14639714523137180322, + 14458934040734756549, + null, + null, + 290, + 300, + 284, + 292, + 65, + 67, + true, + "graph \u00b6\u00b6", + "graph \u00b6\u00b6" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 14861489952347503301, + 15388803781782082196, + null, + null, + 301, + 313, + 293, + 305, + 67, + 74, + true, + "(1.5B edges)", + "(1.5B edges)" + ], + [ + "numval", + "fval", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896427344, + 10294451467892719516, + null, + null, + 302, + 305, + 294, + 297, + 68, + 71, + true, + "1.5", + "1.5" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106350691486682096, + 2513264736124284390, + null, + null, + 305, + 312, + 297, + 304, + 71, + 73, + true, + "B edges", + "B edges" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 18326286580678309185, + 9777799881842905097, + null, + null, + 315, + 354, + 307, + 346, + 75, + 82, + true, + "Two important observations can be made.", + "Two important observations can be made." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8005677520082126207, + 15295884295027505463, + null, + null, + 315, + 341, + 307, + 333, + 75, + 78, + true, + "Two important observations", + "Two important observations" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7520242923557925246, + 16073154361344584091, + null, + null, + 355, + 457, + 347, + 449, + 82, + 104, + true, + "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals.", + "Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152637986, + null, + null, + 368, + 380, + 360, + 372, + 85, + 87, + true, + "graph engine", + "graph engine" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8384182147497629769, + 5245314349915949977, + null, + null, + 427, + 439, + 419, + 431, + 98, + 101, + true, + "higher-order", + "higher-order" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 13311772431027563309, + 3112208822715064100, + null, + null, + 434, + 456, + 426, + 448, + 100, + 103, + true, + "order graph traversals", + "order graph traversals" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 4048875314896090291, + 7787099331214545680, + null, + null, + 458, + 533, + 450, + 525, + 104, + 123, + true, + "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour.", + "With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour." + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603099426, + null, + null, + 463, + 468, + 455, + 460, + 105, + 108, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896300292, + 10294526606162752498, + null, + null, + 463, + 466, + 455, + 458, + 105, + 106, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605968002, + null, + null, + 466, + 467, + 458, + 459, + 106, + 107, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813163053, + null, + null, + 505, + 508, + 497, + 500, + 116, + 117, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106478012012949344, + 6607109766774689060, + null, + null, + 515, + 522, + 507, + 514, + 118, + 119, + true, + "upwards", + "upwards" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235161, + 4257155890657921360, + null, + null, + 526, + 527, + 518, + 519, + 120, + 121, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625695186535, + 7021071120195777033, + null, + null, + 528, + 532, + 520, + 524, + 121, + 122, + true, + "hour", + "hour" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 7221032262585555497, + 11521686287281845163, + null, + null, + 534, + 644, + 526, + 636, + 123, + 146, + true, + "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals.", + "Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 2924972194163802578, + 1896936451152650353, + null, + null, + 548, + 560, + 540, + 552, + 126, + 128, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 16295992254884720120, + 17528779414945550420, + null, + null, + 567, + 583, + 559, + 575, + 129, + 131, + true, + "minimal variance", + "minimal variance" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813288835, + null, + null, + 591, + 594, + 583, + 586, + 133, + 134, + true, + "TTS", + "TTS" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625633007953, + 7019735724866522291, + null, + null, + 607, + 611, + 599, + 603, + 136, + 137, + true, + "runs", + "runs" + ], + [ + "expression", + "word-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106398446669642199, + 13297225597520616176, + null, + null, + 619, + 626, + 611, + 618, + 139, + 142, + true, + "k-order", + "k-order" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 10650750531158196500, + 5921957274745700074, + null, + null, + 621, + 632, + 613, + 624, + 141, + 143, + true, + "order graph", + "order graph" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 10308187620027892234, + 15392908800969629769, + null, + null, + 627, + 643, + 619, + 635, + 142, + 145, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8619280805974492668, + 13535634626495580294, + null, + null, + 633, + 643, + 625, + 635, + 144, + 145, + true, + "traversals", + "traversals" + ], + [ + "sentence", + "proper", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8536372207058713595, + 9010029933076937587, + null, + null, + 645, + 745, + 637, + 737, + 146, + 171, + true, + "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", + "This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from." + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 4914830112961611503, + 2589359867907671877, + null, + null, + 656, + 670, + 648, + 662, + 149, + 151, + true, + "stark contrast", + "stark contrast" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 329104162105779366, + 8851381515603112553, + null, + null, + 674, + 679, + 666, + 671, + 152, + 155, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896300292, + 10294526606162815277, + null, + null, + 674, + 677, + 666, + 669, + 152, + 153, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 17767354399704235156, + 4257155890605956247, + null, + null, + 677, + 678, + 669, + 670, + 153, + 154, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415895656509, + 10294424381813291998, + null, + null, + 691, + 694, + 683, + 686, + 158, + 159, + true, + "TTS", + "TTS" + ], + [ + "expression", + "wtoken-concatenation", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 8106352617177552874, + 13208133057108101651, + null, + null, + 721, + 728, + 713, + 720, + 163, + 167, + true, + "node(s)", + "node(s)" + ], + [ + "term", + "single-term", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 389609625621164460, + 6991227661982178009, + null, + null, + 721, + 725, + 713, + 717, + 163, + 164, + true, + "node", + "node" + ], + [ + "parenthesis", + "round brackets", + 12004249365408683930, + "TEXT", + "#/texts/89", + 1.0, + 12178341415896391104, + 10294450863445651194, + null, + null, + 725, + 728, + 717, + 720, + 164, + 167, + true, + "(s)", + "(s)" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 11108464234987066551, + 6109858459339794472, + null, + null, + 0, + 141, + 0, + 141, + 0, + 25, + true, + "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO.", + "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10068551836820132, + 11381505126689469627, + null, + null, + 8, + 21, + 8, + 21, + 1, + 3, + true, + "big advantage", + "big advantage" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17729840004664227381, + 5880070554995887882, + null, + null, + 35, + 58, + 35, + 58, + 6, + 9, + true, + "adjacency matrix format", + "adjacency matrix format" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14731630785121984489, + 7830997585744793399, + null, + null, + 82, + 110, + 82, + 110, + 14, + 17, + true, + "advanced compression methods", + "advanced compression methods" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481862, + 6611832599487460343, + null, + null, + 111, + 113, + 111, + 113, + 17, + 18, + true, + "18", + "18" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896222617, + 14422417698616585242, + null, + null, + 122, + 125, + 122, + 125, + 20, + 21, + true, + "CSR", + "CSR" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896222361, + 14422417712036675521, + null, + null, + 137, + 140, + 137, + 140, + 23, + 24, + true, + "COO", + "COO" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10246563988291592306, + 2276066290660368261, + null, + null, + 142, + 260, + 142, + 260, + 25, + 46, + true, + "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory.", + "This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148046077703, + null, + null, + 173, + 189, + 173, + 189, + 29, + 31, + true, + "memory footprint", + "memory footprint" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159211247965, + 10155196688984100956, + null, + null, + 197, + 202, + 197, + 202, + 33, + 34, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206539879417749, + 17210280397844822730, + null, + null, + 221, + 227, + 221, + 227, + 37, + 38, + true, + "graphs", + "graphs" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6187817560337829240, + 141181149316580822, + null, + null, + 250, + 259, + 250, + 259, + 42, + 45, + true, + "in-memory", + "in-memory" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8106471889009365966, + 5663177725813371962, + null, + null, + 252, + 259, + 252, + 259, + 43, + 45, + true, + "-memory", + "-memory" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3415607433018794013, + 4687853286732689567, + null, + null, + 261, + 390, + 261, + 390, + 46, + 75, + true, + "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO.", + "In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625695123443, + 16925837287041287106, + null, + null, + 268, + 272, + 268, + 272, + 48, + 49, + true, + "case", + "case" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104162186494203, + 6595611082205689321, + null, + null, + 305, + 310, + 305, + 310, + 56, + 57, + true, + "edges", + "edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15115837464859551979, + 355789543703900736, + null, + null, + 314, + 330, + 314, + 330, + 58, + 60, + true, + "blocked matrices", + "blocked matrices" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 5385534283636121811, + 11562346121021298365, + null, + null, + 336, + 346, + 336, + 346, + 62, + 64, + true, + "fixed size", + "fixed size" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17096888249457072514, + 8451717139878198506, + null, + null, + 362, + 374, + 362, + 374, + 68, + 70, + true, + "block matrix", + "block matrix" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14635113557370658205, + 5397031793806654273, + null, + null, + 381, + 389, + 381, + 389, + 72, + 74, + true, + "type COO", + "type COO" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10474976233539682374, + 18026011700976907235, + null, + null, + 391, + 536, + 391, + 536, + 75, + 107, + true, + "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers.", + "We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625741058932, + 16925015935541254706, + null, + null, + 404, + 408, + 404, + 408, + 78, + 79, + true, + "size", + "size" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17096868097919627199, + 13945693794675913524, + null, + null, + 416, + 428, + 416, + 428, + 81, + 84, + true, + "block-matrix", + "block-matrix" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159220842206, + 10310778067446940500, + null, + null, + 416, + 421, + 416, + 421, + 81, + 82, + true, + "block", + "block" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206594266103973, + 13951928781339872173, + null, + null, + 422, + 428, + 422, + 428, + 83, + 84, + true, + "matrix", + "matrix" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235162, + 16086706123952683919, + null, + null, + 435, + 436, + 435, + 436, + 86, + 87, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481860, + 6611832599456912896, + null, + null, + 437, + 439, + 437, + 439, + 87, + 88, + true, + "16", + "16" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481166, + 6611754875794384515, + null, + null, + 442, + 444, + 442, + 444, + 89, + 90, + true, + "65", + "65" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896310785, + 14422419606559923738, + null, + null, + 445, + 448, + 445, + 448, + 90, + 91, + true, + "536", + "536" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625632301288, + 16928603534806395316, + null, + null, + 461, + 465, + 461, + 465, + 94, + 95, + true, + "pair", + "pair" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8106398345633211267, + 6358279098714856308, + null, + null, + 469, + 476, + 469, + 476, + 96, + 97, + true, + "indices", + "indices" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3115430403987697525, + 12871961618510872276, + null, + null, + 512, + 535, + 512, + 535, + 103, + 106, + true, + "unsigned short integers", + "unsigned short integers" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3685477724550052401, + 9873589769454795234, + null, + null, + 537, + 684, + 537, + 684, + 107, + 140, + true, + "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes.", + "Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625699630670, + 16925667780777279203, + null, + null, + 554, + 558, + 554, + 558, + 110, + 111, + true, + "edge", + "edge" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148045967789, + null, + null, + 565, + 581, + 565, + 581, + 113, + 115, + true, + "memory footprint", + "memory footprint" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708070212, + null, + null, + 590, + 591, + 590, + 591, + 117, + 118, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348378566, + null, + null, + 592, + 597, + 592, + 597, + 118, + 119, + true, + "bytes", + "bytes" + ], + [ + "parenthesis", + "round brackets", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6137469846456037455, + 5202143414337932883, + null, + null, + 598, + 637, + 598, + 637, + 119, + 129, + true, + "(equivalent to a single 32-bit integer)", + "(equivalent to a single 32-bit integer)" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16380810014374034475, + 7208847194404732485, + null, + null, + 622, + 628, + 622, + 628, + 124, + 127, + true, + "32-bit", + "32-bit" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481849, + 6611832587831823848, + null, + null, + 622, + 624, + 622, + 624, + 124, + 125, + true, + "32", + "32" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16690448639448413645, + 3512112847705802723, + null, + null, + 625, + 636, + 625, + 636, + 126, + 128, + true, + "bit integer", + "bit integer" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 2663450017388020648, + 12004026720926102257, + null, + null, + 647, + 660, + 647, + 660, + 132, + 134, + true, + "weighted edge", + "weighted edge" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 6187534604692512350, + 9764773695057964576, + null, + null, + 663, + 672, + 663, + 672, + 135, + 136, + true, + "footprint", + "footprint" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235152, + 16086706131746349816, + null, + null, + 676, + 677, + 676, + 677, + 137, + 138, + true, + "8", + "8" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348386969, + null, + null, + 678, + 683, + 678, + 683, + 138, + 139, + true, + "bytes", + "bytes" + ], + [ + "sentence", + "improper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896407674, + 14422417746503790758, + null, + null, + 685, + 688, + 685, + 688, + 140, + 141, + true, + "***", + "***" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 9302359278229801872, + 12812648110667786289, + null, + null, + 689, + 832, + 689, + 826, + 141, + 172, + true, + "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$).", + "This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$)." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 10871439885151345979, + 15151057053119522021, + null, + null, + 699, + 720, + 699, + 720, + 144, + 146, + true, + "significant reduction", + "significant reduction" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 13543987209185531185, + 9337570148045966672, + null, + null, + 724, + 740, + 724, + 740, + 147, + 149, + true, + "memory footprint", + "memory footprint" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104162105779366, + 6648037853909519105, + null, + null, + 753, + 758, + 753, + 758, + 151, + 154, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341415896300292, + 14422419463042233057, + null, + null, + 753, + 756, + 753, + 756, + 151, + 152, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708539554, + null, + null, + 756, + 757, + 756, + 757, + 152, + 153, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 1441778349508010565, + 15297747033987145820, + null, + null, + 757, + 774, + 757, + 774, + 153, + 156, + true, + "J graph databases", + "J graph databases" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481854, + 6611832586181155412, + null, + null, + 786, + 788, + 786, + 788, + 159, + 160, + true, + "33", + "33" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 329104159327206248, + 10350794591348326495, + null, + null, + 789, + 794, + 789, + 794, + 160, + 161, + true, + "bytes", + "bytes" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 1126026715282292268, + 3148784051061061026, + null, + null, + 799, + 815, + 799, + 815, + 162, + 164, + true, + "unweighted edges", + "unweighted edges" + ], + [ + "expression", + "latex", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206549576689335, + 15188561199320662041, + null, + null, + 816, + 830, + 816, + 824, + 164, + 170, + true, + "^{\u2020\u2020\u2020}", + "$^{\u2020\u2020\u2020}$" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 12178341417198250328, + 14888204717589224560, + null, + null, + 819, + 828, + 819, + 822, + 167, + 168, + true, + "\u2020\u2020\u2020", + "\u2020\u2020\u2020" + ], + [ + "sentence", + "proper", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 8925645990612399548, + 16713121652458311820, + null, + null, + 833, + 1027, + 827, + 1021, + 172, + 216, + true, + "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", + "Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory." + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206539879417749, + 17210280397844777021, + null, + null, + 859, + 865, + 853, + 859, + 177, + 178, + true, + "graphs", + "graphs" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235152, + 16086706131746369369, + null, + null, + 878, + 879, + 872, + 873, + 181, + 182, + true, + "8", + "8" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14857172535504849859, + 5773864008533755225, + null, + null, + 880, + 893, + 874, + 887, + 182, + 184, + true, + "billion edges", + "billion edges" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 14387950977550393964, + 8007787524976843735, + null, + null, + 899, + 914, + 893, + 908, + 186, + 188, + true, + "virtual machine", + "virtual machine" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541481849, + 6611832587831816065, + null, + null, + 920, + 922, + 914, + 916, + 189, + 190, + true, + "32", + "32" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541479948, + 6611754888016241485, + null, + null, + 923, + 925, + 917, + 919, + 190, + 191, + true, + "GB", + "GB" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 3124577709661373204, + 18272255581661630916, + null, + null, + 929, + 940, + 923, + 934, + 192, + 194, + true, + "free memory", + "free memory" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 2187881130485149075, + 6718806557176407140, + null, + null, + 964, + 978, + 958, + 972, + 200, + 202, + true, + "trillion edges", + "trillion edges" + ], + [ + "expression", + "word-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 7166534900846969312, + 708063167962396611, + null, + null, + 984, + 994, + 978, + 988, + 204, + 207, + true, + "bare-metal", + "bare-metal" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625686382362, + 16928856591750626339, + null, + null, + 984, + 988, + 978, + 982, + 204, + 205, + true, + "bare", + "bare" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 11697354855472611330, + 11387019788380482650, + null, + null, + 989, + 1000, + 983, + 994, + 206, + 208, + true, + "metal POWER", + "metal POWER" + ], + [ + "expression", + "wtoken-concatenation", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206511586975208, + 18226864088168425059, + null, + null, + 995, + 1001, + 989, + 995, + 207, + 209, + true, + "POWER9", + "POWER9" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235153, + 16086706131730015547, + null, + null, + 1000, + 1001, + 994, + 995, + 208, + 209, + true, + "9", + "9" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 389609625621164460, + 16920933654159999737, + null, + null, + 1002, + 1006, + 996, + 1000, + 209, + 210, + true, + "node", + "node" + ], + [ + "numval", + "ival", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 17767354399704235156, + 16086706123708163426, + null, + null, + 1012, + 1013, + 1006, + 1007, + 211, + 212, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 15441160910541487906, + 6611830889990664661, + null, + null, + 1014, + 1016, + 1008, + 1010, + 212, + 213, + true, + "TB", + "TB" + ], + [ + "term", + "single-term", + 7223381657047466215, + "TEXT", + "#/texts/90", + 1.0, + 16381206567042997791, + 4033589895737852410, + null, + null, + 1020, + 1026, + 1014, + 1020, + 214, + 215, + true, + "memory", + "memory" + ], + [ + "numval", + "fval", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 12178341415896435196, + 16211286906118314940, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.3", + "3.3" + ], + [ + "sentence", + "improper", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 14192878977779458197, + 10144190012485496573, + null, + null, + 4, + 48, + 4, + 48, + 3, + 10, + true, + "| Formulation and evaluation of deep queries", + "| Formulation and evaluation of deep queries" + ], + [ + "term", + "enum-term-mark-2", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 12865302163893152094, + 15940595769957357319, + null, + null, + 6, + 32, + 6, + 32, + 4, + 7, + true, + "Formulation and evaluation", + "Formulation and evaluation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 2044684058342850165, + 14277022715477019349, + null, + null, + 6, + 17, + 6, + 17, + 4, + 5, + true, + "Formulation", + "Formulation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 5456363662501675139, + 12901679329998763956, + null, + null, + 22, + 32, + 22, + 32, + 6, + 7, + true, + "evaluation", + "evaluation" + ], + [ + "term", + "single-term", + 15132906055887224772, + "TEXT", + "#/texts/91", + 1.0, + 7076268937724050913, + 7970870454235277029, + null, + null, + 36, + 48, + 36, + 48, + 8, + 10, + true, + "deep queries", + "deep queries" + ], + [ + "sentence", + "proper", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 11091581991954269716, + 10456364627355927036, + null, + null, + 0, + 57, + 0, + 57, + 0, + 12, + true, + "The goal of querying a KG is to answer complex questions.", + "The goal of querying a KG is to answer complex questions." + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 389609625699055241, + 9332893958662962709, + null, + null, + 4, + 8, + 4, + 8, + 1, + 2, + true, + "goal", + "goal" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 15441160910541480204, + 16382477296675596695, + null, + null, + 23, + 25, + 23, + 25, + 5, + 6, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 14314461436358843828, + 14492210953206209285, + null, + null, + 39, + 56, + 39, + 56, + 9, + 11, + true, + "complex questions", + "complex questions" + ], + [ + "sentence", + "proper", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 14577311106096638975, + 10656479587756904158, + null, + null, + 58, + 179, + 58, + 179, + 12, + 35, + true, + "As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", + "As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them." + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 329104159157820437, + 17616820081691235592, + null, + null, + 67, + 72, + 67, + 72, + 15, + 16, + true, + "users", + "users" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 4083292969395203883, + 5654192674738865140, + null, + null, + 100, + 113, + 100, + 113, + 22, + 23, + true, + "functionality", + "functionality" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 3916373036270397758, + 5882827815055053772, + null, + null, + 127, + 142, + 127, + 142, + 25, + 27, + true, + "complex queries", + "complex queries" + ], + [ + "term", + "single-term", + 17129434987283608290, + "TEXT", + "#/texts/92", + 1.0, + 15441160910541480204, + 16382477296675604919, + null, + null, + 150, + 152, + 150, + 152, + 29, + 30, + true, + "KG", + "KG" + ], + [ + "sentence", + "proper", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9175725069191114564, + 6016235278680059217, + null, + null, + 0, + 168, + 0, + 168, + 0, + 33, + true, + "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow.", + "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161571401725, + 9611792532481010924, + null, + null, + 3, + 8, + 3, + 8, + 1, + 2, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9548493583803247969, + 10298167230238939895, + null, + null, + 29, + 51, + 29, + 51, + 6, + 9, + true, + "complex query language", + "complex query language" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104159157820437, + 13570310750889527762, + null, + null, + 57, + 62, + 57, + 62, + 10, + 11, + true, + "users", + "users" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895525628, + 8266666853824012019, + null, + null, + 82, + 85, + 82, + 85, + 16, + 17, + true, + "way", + "way" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15274395271512051612, + 7615673165598889472, + null, + null, + 96, + 117, + 96, + 117, + 19, + 22, + true, + "complex graph queries", + "complex graph queries" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15791836353173876541, + 15444549276320690836, + null, + null, + 123, + 141, + 123, + 141, + 24, + 26, + true, + "declarative format", + "declarative format" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 14638857990842534974, + 605379263320532680, + null, + null, + 159, + 167, + 159, + 167, + 31, + 32, + true, + "workflow", + "workflow" + ], + [ + "sentence", + "proper", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 57107468990616569, + 6772300740708680643, + null, + null, + 169, + 254, + 169, + 254, + 33, + 48, + true, + "Workflows are represented as a DAG of operations and are conceptually related to DFs.", + "Workflows are represented as a DAG of operations and are conceptually related to DFs." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 6183387189920121296, + 1410025149081126995, + null, + null, + 169, + 178, + 169, + 178, + 33, + 34, + true, + "Workflows", + "Workflows" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415896112046, + 8266776682312646277, + null, + null, + 200, + 203, + 200, + 203, + 38, + 39, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 13985988710970420061, + 12003721642956804645, + null, + null, + 207, + 217, + 207, + 217, + 40, + 41, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415896110548, + 8266776675949707180, + null, + null, + 250, + 253, + 250, + 253, + 46, + 47, + true, + "DFs", + "DFs" + ], + [ + "sentence", + "proper", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 2103206676431633354, + 7765713919478438730, + null, + null, + 255, + 445, + 255, + 445, + 48, + 84, + true, + "Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set.", + "Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161758737773, + 9650253826023077819, + null, + null, + 278, + 283, + 278, + 283, + 53, + 54, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 2221511436793850179, + 15451583893193860762, + null, + null, + 287, + 300, + 287, + 300, + 55, + 57, + true, + "workflow DAGs", + "workflow DAGs" + ], + [ + "expression", + "word-concatenation", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 18047739014778172965, + 15647829115234352063, + null, + null, + 318, + 337, + 318, + 337, + 60, + 63, + true, + "data-transformation", + "data-transformation" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 389609625696431489, + 11798311443523374643, + null, + null, + 318, + 322, + 318, + 322, + 60, + 61, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17564764652872774216, + 17063213487217751853, + null, + null, + 323, + 343, + 323, + 343, + 62, + 64, + true, + "transformation tasks", + "transformation tasks" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 16077761921532073702, + 15751654443455067910, + null, + null, + 349, + 374, + 349, + 374, + 66, + 69, + true, + "specific graph operations", + "specific graph operations" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161828910287, + 9583702698217235724, + null, + null, + 391, + 396, + 391, + 396, + 72, + 73, + true, + "input", + "input" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 4361897097975010664, + 13376241768661335660, + null, + null, + 397, + 414, + 397, + 414, + 73, + 77, + true, + "(or intermediate)", + "(or intermediate)" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895638602, + 8266662293356183469, + null, + null, + 415, + 418, + 415, + 418, + 77, + 78, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 329104161758737773, + 9650253826023172647, + null, + null, + 422, + 427, + 422, + 427, + 79, + 80, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 12178341415895638602, + 8266662293356182036, + null, + null, + 441, + 444, + 441, + 444, + 82, + 83, + true, + "set", + "set" + ], + [ + "sentence", + "proper", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 14814020289477828640, + 7062038975480317251, + null, + null, + 446, + 481, + 446, + 481, + 84, + 90, + true, + "We call these operations worktasks.", + "We call these operations worktasks." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 3406005564439493096, + 962936403426020052, + null, + null, + 460, + 480, + 460, + 480, + 87, + 89, + true, + "operations worktasks", + "operations worktasks" + ], + [ + "sentence", + "proper", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17133158107037793945, + 13159924364498959182, + null, + null, + 482, + 643, + 482, + 643, + 90, + 121, + true, + "For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", + "For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4)." + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 9340106368352020484, + 9901483225457527231, + null, + null, + 486, + 505, + 486, + 505, + 91, + 93, + true, + "further convenience", + "further convenience" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 7582281372004134232, + 1510365298380696859, + null, + null, + 527, + 551, + 527, + 551, + 98, + 101, + true, + "graphical user interface", + "graphical user interface" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 389609625545054248, + 12214200532744887129, + null, + null, + 552, + 556, + 552, + 556, + 101, + 104, + true, + "(UI)", + "(UI)" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 15441160910541484266, + 13744330050619277317, + null, + null, + 553, + 555, + 553, + 555, + 102, + 103, + true, + "UI", + "UI" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 5681538719543297340, + 7633237073525727434, + null, + null, + 580, + 594, + 580, + 594, + 108, + 110, + true, + "such workflows", + "such workflows" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17267900621492324657, + 3642376820636860698, + null, + null, + 600, + 627, + 600, + 627, + 112, + 115, + true, + "visual programming approach", + "visual programming approach" + ], + [ + "parenthesis", + "round brackets", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 7105842701545013905, + 2265574179061884287, + null, + null, + 628, + 642, + 628, + 642, + 115, + 120, + true, + "(see Figure 4)", + "(see Figure 4)" + ], + [ + "term", + "single-term", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 16381206514091025767, + 10238121304656839602, + null, + null, + 633, + 639, + 633, + 639, + 117, + 118, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 10350406469077463155, + "TEXT", + "#/texts/93", + 1.0, + 17767354399704235156, + 18395627803235450761, + null, + null, + 640, + 641, + 640, + 641, + 118, + 119, + true, + "4", + "4" + ], + [ + "sentence", + "proper", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 1859492819924485121, + 10838117205519727135, + null, + null, + 0, + 128, + 0, + 128, + 0, + 22, + true, + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions.", + "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions." + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17889054130498802051, + 13611413549729115921, + null, + null, + 27, + 44, + 27, + 44, + 5, + 7, + true, + "fundamental types", + "fundamental types" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3534171294115941544, + 8731026536612016164, + null, + null, + 48, + 57, + 48, + 57, + 8, + 9, + true, + "worktasks", + "worktasks" + ], + [ + "expression", + "word-concatenation", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 15221896740599576202, + 7666904121768591309, + null, + null, + 59, + 73, + 59, + 73, + 10, + 13, + true, + "node-retrieval", + "node-retrieval" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 389609625621164460, + 11209728009221918698, + null, + null, + 59, + 63, + 59, + 63, + 10, + 11, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 6168254675298346879, + 6240343808982680566, + null, + null, + 64, + 73, + 64, + 73, + 12, + 13, + true, + "retrieval", + "retrieval" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3503811091434006699, + 4368860458480451668, + null, + null, + 75, + 84, + 75, + 84, + 14, + 15, + true, + "traversal", + "traversal" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 16654294478124171317, + 10151652501900860692, + null, + null, + 86, + 103, + 86, + 103, + 16, + 18, + true, + "logical operators", + "logical operators" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 11555096374369856312, + 7157942907653228754, + null, + null, + 108, + 127, + 108, + 127, + 19, + 21, + true, + "transform functions", + "transform functions" + ], + [ + "sentence", + "proper", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 4963035477772371835, + 4020325737246968829, + null, + null, + 129, + 262, + 129, + 262, + 22, + 46, + true, + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", + "In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design." + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17030057430150962643, + 11687865223449973507, + null, + null, + 136, + 154, + 136, + 154, + 24, + 26, + true, + "following sections", + "following sections" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 16381206568246674273, + 3558057784302965696, + null, + null, + 175, + 181, + 175, + 181, + 31, + 32, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 3534171294115941544, + 8731026536612028033, + null, + null, + 190, + 199, + 190, + 199, + 34, + 35, + true, + "worktasks", + "worktasks" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 8106398484416909789, + 4307351017350543686, + null, + null, + 223, + 230, + 223, + 230, + 39, + 40, + true, + "context", + "context" + ], + [ + "term", + "single-term", + 16949854269270315165, + "TEXT", + "#/texts/94", + 1.0, + 17730388821334829224, + 6503636413294871875, + null, + null, + 238, + 261, + 238, + 261, + 42, + 45, + true, + "adjacency matrix design", + "adjacency matrix design" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/95", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 17767354399704235153, + 1792635071361844496, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "9", + "9" + ], + [ + "sentence", + "improper", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 15441160910541485670, + 7911155824351265465, + null, + null, + 1, + 3, + 1, + 3, + 1, + 2, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549266593946746, + "TEXT", + "#/texts/96", + 1.0, + 15441160910541481979, + 7911155768595088752, + null, + null, + 3, + 5, + 3, + 5, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "improper", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 6349660887815587103, + 9627223604255737762, + null, + null, + 0, + 22, + 0, + 22, + 0, + 8, + true, + "3.3.1 | Node retrieval", + "3.3.1 | Node retrieval" + ], + [ + "expression", + "wtoken-concatenation", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 329104147725285867, + 13023020285713349824, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "3.3.1", + "3.3.1" + ], + [ + "numval", + "fval", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 12178341415896435196, + 198388536621247129, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 9802652237802670052, + "TEXT", + "#/texts/97", + 1.0, + 17767354399704235161, + 3052200858272860943, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "1", + "1" + ], + [ + "sentence", + "proper", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 13639548757740861010, + 11696805249441926913, + null, + null, + 0, + 69, + 0, + 69, + 0, + 13, + true, + "This task finds a set of nodes which satisfy certain search criteria.", + "This task finds a set of nodes which satisfy certain search criteria." + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625631210899, + 1695322703373668221, + null, + null, + 5, + 9, + 5, + 9, + 1, + 2, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 12178341415895638602, + 16401925845918103767, + null, + null, + 18, + 21, + 18, + 21, + 4, + 5, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 329104161758737773, + 9063467011231067037, + null, + null, + 25, + 30, + 25, + 30, + 6, + 7, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 1139782918783911343, + 10980002430644435601, + null, + null, + 45, + 68, + 45, + 68, + 9, + 12, + true, + "certain search criteria", + "certain search criteria" + ], + [ + "sentence", + "proper", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 9504985242355517435, + 18023630049865929203, + null, + null, + 70, + 216, + 70, + 216, + 13, + 41, + true, + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property.", + "This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property." + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 1353284443403185756, + 13247714493573934499, + null, + null, + 100, + 111, + 100, + 111, + 19, + 21, + true, + "single node", + "single node" + ], + [ + "parenthesis", + "round brackets", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6343195480109663451, + 11165462414382695465, + null, + null, + 119, + 132, + 119, + 132, + 23, + 26, + true, + "(approximate)", + "(approximate)" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625621548280, + 1694766356608744958, + null, + null, + 133, + 137, + 133, + 137, + 26, + 27, + true, + "name", + "name" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6764280510749928008, + 2538978002994667418, + null, + null, + 141, + 162, + 141, + 162, + 28, + 31, + true, + "exact node identifier", + "exact node identifier" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 329104161758737773, + 9063467011231090358, + null, + null, + 175, + 180, + 175, + 180, + 34, + 35, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 6423270415561497308, + 8377404395557394670, + null, + null, + 196, + 215, + 196, + 215, + 38, + 40, + true, + "particular property", + "particular property" + ], + [ + "sentence", + "improper", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 11273853394820260322, + 5543894756229177106, + null, + null, + 217, + 270, + 217, + 270, + 41, + 57, + true, + "The task constructs a node vector v $^{!}$, such that", + "The task constructs a node vector v $^{!}$, such that" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625631210899, + 1695322703373656343, + null, + null, + 221, + 225, + 221, + 225, + 42, + 43, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 7596240835069815859, + 15004378462042502165, + null, + null, + 239, + 250, + 239, + 250, + 45, + 47, + true, + "node vector", + "node vector" + ], + [ + "expression", + "latex", + 5524728206729419689, + "TEXT", + "#/texts/98", + 1.0, + 389609625699793568, + 1705012483593147870, + null, + null, + 253, + 259, + 253, + 259, + 48, + 54, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "improper", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 588808569772103507, + 3158630085314057550, + null, + null, + 0, + 71, + 0, + 69, + 0, + 36, + true, + "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", + "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de" + ], + [ + "expression", + "wtoken-concatenation", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 5948620232447446819, + 3619933651552123134, + null, + null, + 2, + 15, + 2, + 15, + 1, + 14, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235161, + 3863023118325513235, + null, + null, + 16, + 17, + 16, + 17, + 14, + 15, + true, + "1", + "1" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235160, + 3863023118293440507, + null, + null, + 33, + 34, + 33, + 34, + 20, + 21, + true, + "0", + "0" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235162, + 3863023118274566919, + null, + null, + 47, + 48, + 47, + 48, + 25, + 26, + true, + "2", + "2" + ], + [ + "expression", + "wtoken-concatenation", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 7116489890516680880, + 11145030960935339860, + null, + null, + 53, + 63, + 53, + 63, + 28, + 33, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 15441160910541481788, + 1525860005576289474, + null, + null, + 60, + 62, + 60, + 62, + 31, + 32, + true, + "26", + "26" + ], + [ + "numval", + "ival", + 4043385013945968936, + "TEXT", + "#/texts/99", + 1.0, + 17767354399704235163, + 3863023118291550190, + null, + null, + 67, + 68, + 66, + 67, + 34, + 35, + true, + "3", + "3" + ], + [ + "sentence", + "improper", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 329104161580313375, + 9731581819344976201, + null, + null, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "where", + "where" + ], + [ + "sentence", + "proper", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 11753315931385641908, + 2734980420462844181, + null, + null, + 6, + 69, + 6, + 69, + 1, + 13, + true, + "S represents the set of nodes that satisfy the search criteria.", + "S represents the set of nodes that satisfy the search criteria." + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 12178341415895638602, + 1959738706672078328, + null, + null, + 23, + 26, + 23, + 26, + 4, + 5, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 329104161758737773, + 9790437187668217640, + null, + null, + 30, + 35, + 30, + 35, + 6, + 7, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 11778884428660217326, + "TEXT", + "#/texts/100", + 1.0, + 6565208683621509436, + 15059693667290050564, + null, + null, + 53, + 68, + 53, + 68, + 10, + 12, + true, + "search criteria", + "search criteria" + ], + [ + "sentence", + "improper", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 10555101842315227314, + 3578570888443863693, + null, + null, + 0, + 23, + 0, + 23, + 0, + 8, + true, + "3.3.2 | Graph traversal", + "3.3.2 | Graph traversal" + ], + [ + "expression", + "wtoken-concatenation", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 329104147725285866, + 5872895868719124566, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "3.3.2", + "3.3.2" + ], + [ + "numval", + "fval", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 12178341415896435196, + 17738549797942293450, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 17767354399704235162, + 16045717610508207921, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 12875050310340408203, + "TEXT", + "#/texts/101", + 1.0, + 14871935126973563211, + 10403115224383595903, + null, + null, + 6, + 23, + 6, + 23, + 5, + 8, + true, + "| Graph traversal", + "| Graph traversal" + ], + [ + "sentence", + "proper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 6704314193472549131, + 12203121571784219650, + null, + null, + 0, + 67, + 0, + 67, + 0, + 14, + true, + "The simplest type of graph-traversal is the direct graph-traversal.", + "The simplest type of graph-traversal is the direct graph-traversal." + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 11151330421356998770, + 885013344391439016, + null, + null, + 4, + 17, + 4, + 17, + 1, + 3, + true, + "simplest type", + "simplest type" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8759553427650775934, + 8729620688739724694, + null, + null, + 21, + 36, + 21, + 36, + 4, + 7, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104159211247965, + 3851831851148408183, + null, + null, + 21, + 26, + 21, + 26, + 4, + 5, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 3503811091434006699, + 9904713603809862784, + null, + null, + 27, + 36, + 27, + 36, + 6, + 7, + true, + "traversal", + "traversal" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 17571912424136369068, + 13732941392666588532, + null, + null, + 44, + 56, + 44, + 56, + 9, + 11, + true, + "direct graph", + "direct graph" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8759553427650775934, + 8729620688739718345, + null, + null, + 51, + 66, + 51, + 66, + 10, + 13, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 3503811091434006699, + 9904713603809864760, + null, + null, + 57, + 66, + 57, + 66, + 12, + 13, + true, + "traversal", + "traversal" + ], + [ + "sentence", + "proper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 5996076621080095124, + 3025184828607773624, + null, + null, + 68, + 188, + 68, + 188, + 14, + 49, + true, + "As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$.", + "As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$." + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 16381206568246674273, + 6949217655613054178, + null, + null, + 84, + 90, + 84, + 90, + 17, + 18, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106478708629288965, + 2634479629892977717, + null, + null, + 94, + 101, + 94, + 101, + 19, + 20, + true, + "section", + "section" + ], + [ + "numval", + "fval", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 12178341415896435198, + 9356552374251491539, + null, + null, + 102, + 105, + 102, + 105, + 20, + 23, + true, + "3.1", + "3.1" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13752318599956892986, + 3243882999611675116, + null, + null, + 137, + 167, + 137, + 167, + 30, + 33, + true, + "straightforward SpMV operation", + "straightforward SpMV operation" + ], + [ + "expression", + "wtoken-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104159258693175, + 3850990531940834900, + null, + null, + 170, + 177, + 170, + 177, + 34, + 41, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625537321081, + 15921994273926457966, + null, + null, + 176, + 180, + 176, + 180, + 40, + 42, + true, + "= Av", + "= Av" + ], + [ + "expression", + "latex", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625699793568, + 15897512725555629958, + null, + null, + 181, + 187, + 181, + 187, + 42, + 48, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "proper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 4548432471191064907, + 12405546864064907829, + null, + null, + 189, + 274, + 189, + 274, + 49, + 66, + true, + "In more advanced types of graph-traversals, we evaluate all paths of different depth.", + "In more advanced types of graph-traversals, we evaluate all paths of different depth." + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13549848866814318649, + 12951100156455938138, + null, + null, + 197, + 211, + 197, + 211, + 51, + 53, + true, + "advanced types", + "advanced types" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 10308187620027892234, + 17660718396182283452, + null, + null, + 215, + 231, + 215, + 231, + 54, + 57, + true, + "graph-traversals", + "graph-traversals" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104159211247965, + 3851831851148412490, + null, + null, + 215, + 220, + 215, + 220, + 54, + 55, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8619280805974492668, + 1246303562062216555, + null, + null, + 221, + 231, + 221, + 231, + 56, + 57, + true, + "traversals", + "traversals" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161667979410, + 16219622563067219904, + null, + null, + 249, + 254, + 249, + 254, + 61, + 62, + true, + "paths", + "paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13127417780813530133, + 2668820547192862622, + null, + null, + 258, + 273, + 258, + 273, + 63, + 65, + true, + "different depth", + "different depth" + ], + [ + "sentence", + "proper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13078819215206981832, + 1971017990059557300, + null, + null, + 275, + 486, + 275, + 486, + 66, + 102, + true, + "Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length.", + "Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length." + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 16381206574973295053, + 8613944306628715549, + null, + null, + 285, + 291, + 285, + 291, + 68, + 69, + true, + "number", + "number" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161667979410, + 16219622563067217673, + null, + null, + 295, + 300, + 295, + 300, + 70, + 71, + true, + "paths", + "paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 329104161758737773, + 16234752642064308276, + null, + null, + 316, + 321, + 316, + 321, + 73, + 74, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 13972526853646866432, + 4963666646089781896, + null, + null, + 360, + 370, + 360, + 370, + 79, + 80, + true, + "pathlength", + "pathlength" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 4603153860084293890, + 10724656896814481236, + null, + null, + 398, + 410, + 398, + 410, + 85, + 86, + true, + "contribution", + "contribution" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625632305102, + 15909948299731138978, + null, + null, + 419, + 423, + 419, + 423, + 88, + 89, + true, + "path", + "path" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 2920317602823103143, + 14435126486004632161, + null, + null, + 449, + 466, + 449, + 466, + 94, + 96, + true, + "inverse factorial", + "inverse factorial" + ], + [ + "expression", + "word-concatenation", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 18223709631064383906, + 5290334765848251647, + null, + null, + 474, + 485, + 474, + 485, + 98, + 101, + true, + "path-length", + "path-length" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625632305102, + 15909948299731130338, + null, + null, + 474, + 478, + 474, + 478, + 98, + 99, + true, + "path", + "path" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 16381206590668214829, + 8983667839008968622, + null, + null, + 479, + 485, + 479, + 485, + 100, + 101, + true, + "length", + "length" + ], + [ + "sentence", + "improper", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 7579237777413581592, + 7101335723397854438, + null, + null, + 487, + 580, + 487, + 580, + 102, + 121, + true, + "For example, consider the case in which we want to explore deeper, indirect paths as follows,", + "For example, consider the case in which we want to explore deeper, indirect paths as follows," + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106397496085150773, + 11486711446788774948, + null, + null, + 491, + 498, + 491, + 498, + 103, + 104, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 389609625695123443, + 15908991454686209555, + null, + null, + 513, + 517, + 513, + 517, + 107, + 108, + true, + "case", + "case" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 12062282599681290620, + 16899929540755782838, + null, + null, + 554, + 568, + 554, + 568, + 116, + 118, + true, + "indirect paths", + "indirect paths" + ], + [ + "term", + "single-term", + 3785875504044487339, + "TEXT", + "#/texts/102", + 1.0, + 8106397733466170068, + 4491208394922089960, + null, + null, + 572, + 579, + 572, + 579, + 119, + 120, + true, + "follows", + "follows" + ], + [ + "sentence", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235223, + 9989301221673871682, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "w", + "w" + ], + [ + "sentence", + "proper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 9916410401532570841, + 4842910269063890136, + null, + null, + 2, + 21, + 2, + 21, + 1, + 14, + true, + "$^{!}$= A + A 2 2 !", + "$^{!}$= A + A 2 2 !" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 329104159258693175, + 3768331475560236011, + null, + null, + 2, + 9, + 2, + 9, + 1, + 8, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235162, + 9989301225055039953, + null, + null, + 16, + 17, + 16, + 17, + 11, + 12, + true, + "2", + "2" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235162, + 9989301225055040085, + null, + null, + 18, + 19, + 18, + 19, + 12, + 13, + true, + "2", + "2" + ], + [ + "sentence", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235139, + 9989301225025680993, + null, + null, + 22, + 23, + 22, + 23, + 14, + 15, + true, + "+", + "+" + ], + [ + "sentence", + "proper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 8106351318766820018, + 16049833510509929686, + null, + null, + 24, + 31, + 24, + 31, + 15, + 19, + true, + "A 3 3 !", + "A 3 3 !" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235163, + 9989301225441111510, + null, + null, + 26, + 27, + 26, + 27, + 16, + 17, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235163, + 9989301225441111382, + null, + null, + 28, + 29, + 28, + 29, + 17, + 18, + true, + "3", + "3" + ], + [ + "sentence", + "improper", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 3035011012940480021, + 1832428019455168426, + null, + null, + 32, + 144, + 32, + 142, + 19, + 83, + true, + "+ GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", + "+ GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540821317, + null, + null, + 34, + 43, + 34, + 43, + 20, + 25, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833998387, + null, + null, + 41, + 42, + 41, + 42, + 23, + 24, + true, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540821195, + null, + null, + 44, + 53, + 44, + 53, + 25, + 30, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833995448, + null, + null, + 51, + 52, + 51, + 52, + 28, + 29, + true, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540889198, + null, + null, + 54, + 63, + 54, + 63, + 30, + 35, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833985318, + null, + null, + 61, + 62, + 61, + 62, + 33, + 34, + true, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 7116489890516677091, + 1239928655295932073, + null, + null, + 64, + 74, + 64, + 74, + 35, + 40, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 15441160910541481862, + 7426216222773784579, + null, + null, + 71, + 73, + 71, + 73, + 38, + 39, + true, + "18", + "18" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 7116489890516677417, + 1239930097162833493, + null, + null, + 75, + 85, + 75, + 85, + 40, + 45, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 15441160910541481863, + 7426216222719391073, + null, + null, + 82, + 84, + 82, + 84, + 43, + 44, + true, + "19", + "19" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 329104159258693175, + 3768331475560262129, + null, + null, + 88, + 95, + 88, + 95, + 46, + 53, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 16381206564827819341, + 4168246366157895749, + null, + null, + 96, + 104, + 96, + 104, + 53, + 61, + true, + "e^{A}-", + "e$^{A}$-" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 12178341415896413249, + 17313632338592011779, + null, + null, + 103, + 106, + 103, + 106, + 60, + 62, + true, + "- 1", + "- 1" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763647, + 13661890647980804945, + null, + null, + 107, + 116, + 107, + 116, + 62, + 67, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235160, + 9989301226364846173, + null, + null, + 114, + 115, + 114, + 115, + 65, + 66, + true, + "0", + "0" + ], + [ + "expression", + "wtoken-concatenation", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 2902585676651763710, + 13661890687540890061, + null, + null, + 117, + 126, + 117, + 126, + 67, + 72, + true, + "GLYPH", + "GLYPH" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235161, + 9989301227833984724, + null, + null, + 124, + 125, + 124, + 125, + 70, + 71, + true, + "1", + "1" + ], + [ + "expression", + "latex", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 389609625699793568, + 263634606544655319, + null, + null, + 129, + 135, + 129, + 135, + 73, + 79, + true, + "^{!}", + "$^{!}$" + ], + [ + "numval", + "ival", + 12105626155924658285, + "TEXT", + "#/texts/103", + 1.0, + 17767354399704235156, + 9989301228016144908, + null, + null, + 140, + 141, + 139, + 140, + 81, + 82, + true, + "4", + "4" + ], + [ + "sentence", + "proper", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 4946211207219256029, + 13284838307535775434, + null, + null, + 0, + 152, + 0, + 150, + 0, + 47, + true, + "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$.", + "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$." + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17844194112438609771, + 15245159028300247470, + null, + null, + 12, + 24, + 12, + 24, + 3, + 5, + true, + "generic case", + "generic case" + ], + [ + "expression", + "word-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 8759553427650775934, + 13977638772783275859, + null, + null, + 28, + 43, + 28, + 43, + 7, + 10, + true, + "graph-traversal", + "graph-traversal" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 329104159211247965, + 1909177009317667114, + null, + null, + 28, + 33, + 28, + 33, + 7, + 8, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 3503811091434006699, + 1552593513615382485, + null, + null, + 34, + 43, + 34, + 43, + 9, + 10, + true, + "traversal", + "traversal" + ], + [ + "expression", + "word-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 4667405858993953327, + 7138937013884052228, + null, + null, + 79, + 94, + 79, + 94, + 17, + 20, + true, + "matrix-function", + "matrix-function" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 16381206594266103973, + 8606053226958932503, + null, + null, + 79, + 85, + 79, + 85, + 17, + 18, + true, + "matrix", + "matrix" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 14637915316557309079, + 16851262163263336248, + null, + null, + 86, + 94, + 86, + 94, + 19, + 20, + true, + "function", + "function" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625699630670, + 11988322654688297783, + null, + null, + 109, + 113, + 109, + 113, + 23, + 24, + true, + "edge", + "edge" + ], + [ + "expression", + "wtoken-concatenation", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 329104159258693175, + 1907154720488094232, + null, + null, + 126, + 133, + 126, + 133, + 29, + 36, + true, + "^{!}=", + "$^{!}$=" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 16381206524802769002, + 6972047781742433048, + null, + null, + 132, + 139, + 132, + 138, + 35, + 38, + true, + "= fA \u00f0", + "= fA \u00f0" + ], + [ + "expression", + "latex", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625699793568, + 11988523914287915245, + null, + null, + 145, + 151, + 143, + 149, + 40, + 46, + true, + "^{!}", + "$^{!}$" + ], + [ + "sentence", + "proper", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 3911818568884640949, + 14572114789426270089, + null, + null, + 153, + 307, + 151, + 305, + 47, + 72, + true, + "As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", + "As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion." + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 16381206568246674273, + 7876224071511699486, + null, + null, + 169, + 175, + 167, + 173, + 50, + 51, + true, + "detail", + "detail" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 12580512760652482076, + 9613461018338631967, + null, + null, + 179, + 192, + 177, + 190, + 52, + 54, + true, + "previous work", + "previous work" + ], + [ + "numval", + "ival", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17767354399704235162, + 13895472510679550781, + null, + null, + 194, + 195, + 192, + 193, + 55, + 56, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 389609625631434316, + 11975495498267754916, + null, + null, + 201, + 205, + 199, + 203, + 57, + 58, + true, + "type", + "type" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 6167836358624304835, + 5859531764025592016, + null, + null, + 209, + 218, + 207, + 216, + 59, + 60, + true, + "operation", + "operation" + ], + [ + "term", + "single-term", + 16265612055607243129, + "TEXT", + "#/texts/104", + 1.0, + 17218927816364445558, + 9299012655407568424, + null, + null, + 266, + 306, + 264, + 304, + 67, + 71, + true, + "recursive Chebyshev polynomial expansion", + "recursive Chebyshev polynomial expansion" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/105", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "sentence", + "improper", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 6188098459342469819, + 1229703042810128321, + null, + null, + 0, + 26, + 0, + 26, + 0, + 8, + true, + "3.3.3 | Logical operations", + "3.3.3 | Logical operations" + ], + [ + "expression", + "wtoken-concatenation", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 329104147725285869, + 6000044661942170615, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "3.3.3", + "3.3.3" + ], + [ + "numval", + "fval", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 12178341415896435196, + 4867750156681578759, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 17767354399704235163, + 11397855393475351535, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "3", + "3" + ], + [ + "term", + "single-term", + 10252446451495472512, + "TEXT", + "#/texts/106", + 1.0, + 17545402118559791717, + 17555948970743190738, + null, + null, + 6, + 26, + 6, + 26, + 5, + 8, + true, + "| Logical operations", + "| Logical operations" + ], + [ + "sentence", + "proper", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 13700534978823339273, + 12250932271253598025, + null, + null, + 0, + 115, + 0, + 115, + 0, + 22, + true, + "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector.", + "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector." + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12603609256967955544, + 14772344826157711306, + null, + null, + 3, + 21, + 3, + 21, + 1, + 3, + true, + "logical operations", + "logical operations" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 389609625741077841, + 8558423680807701295, + null, + null, + 27, + 31, + 27, + 31, + 5, + 6, + true, + "sets", + "sets" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 329104161758737773, + 11460579442964916464, + null, + null, + 35, + 40, + 35, + 40, + 7, + 8, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415895638602, + 6980157083956599502, + null, + null, + 71, + 74, + 71, + 74, + 13, + 14, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 7596240835069815859, + 12179457947178679624, + null, + null, + 103, + 114, + 103, + 114, + 19, + 21, + true, + "node vector", + "node vector" + ], + [ + "sentence", + "proper", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 7959509938832284547, + 7438636696455487498, + null, + null, + 116, + 176, + 116, + 176, + 22, + 36, + true, + "There are three common logical operations, AND, OR, and NOT.", + "There are three common logical operations, AND, OR, and NOT." + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 14800086467850479009, + 666210018065836720, + null, + null, + 132, + 157, + 132, + 157, + 25, + 28, + true, + "common logical operations", + "common logical operations" + ], + [ + "term", + "enum-term-mark-4", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 11299968147221621659, + 15638039005966470642, + null, + null, + 164, + 175, + 164, + 175, + 31, + 35, + true, + "OR, and NOT", + "OR, and NOT" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 15441160910541487730, + 17782216520369344466, + null, + null, + 164, + 166, + 164, + 166, + 31, + 32, + true, + "OR", + "OR" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415896300384, + 6980219868635577240, + null, + null, + 172, + 175, + 172, + 175, + 34, + 35, + true, + "NOT", + "NOT" + ], + [ + "sentence", + "proper", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 2530535679015163989, + 6499975118547970703, + null, + null, + 177, + 310, + 177, + 310, + 36, + 60, + true, + "In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors.", + "In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors." + ], + [ + "term", + "enum-term-mark-4", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 5900106061036628893, + 4204146521886515958, + null, + null, + 184, + 194, + 184, + 194, + 38, + 41, + true, + "AND and OR", + "AND and OR" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12178341415896229184, + 6980242431802160591, + null, + null, + 184, + 187, + 184, + 187, + 38, + 39, + true, + "AND", + "AND" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 12821814845233359770, + 1370605997523919099, + null, + null, + 192, + 205, + 192, + 205, + 40, + 42, + true, + "OR operations", + "OR operations" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 14773391768607445380, + 7907749020990852481, + null, + null, + 239, + 254, + 239, + 254, + 49, + 51, + true, + "arithmetic mean", + "arithmetic mean" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 15262760339251519687, + 2135796427551055674, + null, + null, + 277, + 294, + 277, + 294, + 54, + 56, + true, + "pairwise elements", + "pairwise elements" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 8106477900816818323, + 7610138008398569534, + null, + null, + 302, + 309, + 302, + 309, + 58, + 59, + true, + "vectors", + "vectors" + ], + [ + "sentence", + "proper", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 11716731942292146940, + 10918583807814825475, + null, + null, + 311, + 390, + 311, + 390, + 60, + 77, + true, + "In the NOT operation, we inverse the sign for each element of the input vector.", + "In the NOT operation, we inverse the sign for each element of the input vector." + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 17380026057076513286, + 8554846137731019782, + null, + null, + 318, + 331, + 318, + 331, + 62, + 64, + true, + "NOT operation", + "NOT operation" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 389609625741040683, + 8558425378506358436, + null, + null, + 348, + 352, + 348, + 352, + 68, + 69, + true, + "sign", + "sign" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 8106397492274286821, + 4903176674517740239, + null, + null, + 362, + 369, + 362, + 369, + 71, + 72, + true, + "element", + "element" + ], + [ + "term", + "single-term", + 17011944206067158637, + "TEXT", + "#/texts/107", + 1.0, + 785590888379155985, + 2070684625654949058, + null, + null, + 377, + 389, + 377, + 389, + 74, + 76, + true, + "input vector", + "input vector" + ], + [ + "sentence", + "improper", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 4767177430745297228, + 228154443239687699, + null, + null, + 0, + 27, + 0, + 27, + 0, + 8, + true, + "3.3.4 | Transform functions", + "3.3.4 | Transform functions" + ], + [ + "expression", + "wtoken-concatenation", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 329104147725285868, + 17145181082057860493, + null, + null, + 0, + 5, + 0, + 5, + 0, + 5, + true, + "3.3.4", + "3.3.4" + ], + [ + "numval", + "fval", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 12178341415896435196, + 4375676351556568035, + null, + null, + 0, + 3, + 0, + 3, + 0, + 3, + true, + "3.3", + "3.3" + ], + [ + "numval", + "ival", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 17767354399704235156, + 14141377842797647357, + null, + null, + 4, + 5, + 4, + 5, + 4, + 5, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 16289627123982758705, + "TEXT", + "#/texts/108", + 1.0, + 13342194518649961055, + 15246182238421227996, + null, + null, + 6, + 27, + 6, + 27, + 5, + 8, + true, + "| Transform functions", + "| Transform functions" + ], + [ + "sentence", + "proper", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 15673589737287090621, + 17018937476478673316, + null, + null, + 0, + 82, + 0, + 82, + 0, + 13, + true, + "Lastly, we implement operations which transform the weights associated with nodes.", + "Lastly, we implement operations which transform the weights associated with nodes." + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 13985988710970420061, + 13844832953337165230, + null, + null, + 21, + 31, + 21, + 31, + 4, + 5, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 8106477822555716423, + 4302380438101543009, + null, + null, + 52, + 59, + 52, + 59, + 8, + 9, + true, + "weights", + "weights" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 329104161758737773, + 15575547173408857515, + null, + null, + 76, + 81, + 76, + 81, + 11, + 12, + true, + "nodes", + "nodes" + ], + [ + "sentence", + "proper", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 415786532727651604, + 2431945995463542775, + null, + null, + 83, + 172, + 83, + 172, + 13, + 27, + true, + "One such operation renormalizes and ultimately ranks the nodes according to their weight.", + "One such operation renormalizes and ultimately ranks the nodes according to their weight." + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 13828980233091888506, + 10657123071909144340, + null, + null, + 83, + 114, + 83, + 114, + 13, + 17, + true, + "One such operation renormalizes", + "One such operation renormalizes" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 329104161758737773, + 15575547173408845563, + null, + null, + 140, + 145, + 140, + 145, + 21, + 22, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 13969801897340997317, + "TEXT", + "#/texts/109", + 1.0, + 16381206557786164800, + 15950847297401313251, + null, + null, + 165, + 171, + 165, + 171, + 25, + 26, + true, + "weight", + "weight" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 17111569020893923564, + 7584218824091880092, + null, + null, + 0, + 137, + 0, + 137, + 0, + 25, + true, + "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs.", + "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159243796903, + 10102864649349257834, + null, + null, + 16, + 21, + 16, + 21, + 3, + 4, + true, + "types", + "types" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 13985988710970420061, + 5670507251515585408, + null, + null, + 25, + 35, + 25, + 35, + 5, + 6, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 18146432382445665275, + 8129649123858642143, + null, + null, + 52, + 64, + 52, + 64, + 10, + 12, + true, + "rich queries", + "rich queries" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14314461436358843828, + 3961703421592680698, + null, + null, + 75, + 92, + 75, + 92, + 14, + 16, + true, + "complex questions", + "complex questions" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16086744441680563418, + 6576900302010150786, + null, + null, + 109, + 124, + 109, + 124, + 20, + 22, + true, + "multiple inputs", + "multiple inputs" + ], + [ + "term", + "enum-term-mark-3", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 4974816129218667479, + 1616253342736872326, + null, + null, + 118, + 136, + 118, + 136, + 21, + 24, + true, + "inputs and outputs", + "inputs and outputs" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8106342536052271615, + 12336710488765885535, + null, + null, + 129, + 136, + 129, + 136, + 23, + 24, + true, + "outputs", + "outputs" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 1177385013172704664, + 14024608104153249268, + null, + null, + 138, + 209, + 138, + 209, + 25, + 39, + true, + "Let us now discuss how a workflow is evaluated within the graph engine.", + "Let us now discuss how a workflow is evaluated within the graph engine." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990842534974, + 12699876069210148656, + null, + null, + 163, + 171, + 163, + 171, + 31, + 32, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 2924972194163802578, + 2750704591142470583, + null, + null, + 196, + 208, + 196, + 208, + 36, + 38, + true, + "graph engine", + "graph engine" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8042820535480076507, + 16114465682263414482, + null, + null, + 210, + 291, + 210, + 291, + 39, + 54, + true, + "Once a workflow has been submitted, each worktask is initially assigned a vector.", + "Once a workflow has been submitted, each worktask is initially assigned a vector." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990842534974, + 12699876069210239434, + null, + null, + 217, + 225, + 217, + 225, + 41, + 42, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14638857990856728723, + 12699698953372660440, + null, + null, + 251, + 259, + 251, + 259, + 47, + 48, + true, + "worktask", + "worktask" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16381206519458118578, + 17808566753953437795, + null, + null, + 284, + 290, + 284, + 290, + 52, + 53, + true, + "vector", + "vector" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8483744196766237297, + 6529174387420215088, + null, + null, + 292, + 354, + 292, + 354, + 54, + 79, + true, + "These vectors are all initialized to zero (v $^{!}$$_{i}$= 0).", + "These vectors are all initialized to zero (v $^{!}$$_{i}$= 0)." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 8106477900816818323, + 3863078491683433197, + null, + null, + 298, + 305, + 298, + 305, + 55, + 56, + true, + "vectors", + "vectors" + ], + [ + "parenthesis", + "round brackets", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5941512262948363295, + 9796117134063746060, + null, + null, + 334, + 353, + 334, + 353, + 61, + 78, + true, + "(v $^{!}$$_{i}$= 0)", + "(v $^{!}$$_{i}$= 0)" + ], + [ + "expression", + "wtoken-concatenation", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5948620232447446819, + 12075094026550463871, + null, + null, + 337, + 350, + 337, + 350, + 63, + 76, + true, + "^{!}_{i}=", + "$^{!}$$_{i}$=" + ], + [ + "numval", + "ival", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 17767354399704235160, + 3668124634718140630, + null, + null, + 351, + 352, + 351, + 352, + 76, + 77, + true, + "0", + "0" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 13788972938660668664, + 2865802558503613028, + null, + null, + 355, + 453, + 355, + 453, + 79, + 99, + true, + "Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel.", + "Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625695751254, + 2338768517388150094, + null, + null, + 355, + 359, + 355, + 359, + 79, + 80, + true, + "Next", + "Next" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159211247965, + 10104926125817128652, + null, + null, + 365, + 370, + 365, + 370, + 82, + 83, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 12178341415896112046, + 6992158052106598564, + null, + null, + 388, + 391, + 388, + 391, + 86, + 87, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 3534171294115941544, + 16274811017124843027, + null, + null, + 395, + 404, + 395, + 404, + 88, + 89, + true, + "worktasks", + "worktasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159214088329, + 10104410304708834324, + null, + null, + 424, + 429, + 424, + 429, + 92, + 93, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14814034872218884114, + 4906660160194336109, + null, + null, + 444, + 452, + 444, + 452, + 97, + 98, + true, + "parallel", + "parallel" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 5892389074642583888, + 1449681128272710361, + null, + null, + 454, + 623, + 454, + 623, + 99, + 133, + true, + "This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel.", + "This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 124916246655760082, + 14997524246937848977, + null, + null, + 487, + 503, + 487, + 503, + 105, + 107, + true, + "topological sort", + "topological sort" + ], + [ + "expression", + "word-concatenation", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 1526275179175870585, + 7011924518277257184, + null, + null, + 510, + 521, + 510, + 521, + 108, + 111, + true, + "depth-first", + "depth-first" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104162100250438, + 9265356014683234279, + null, + null, + 510, + 515, + 510, + 515, + 108, + 109, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 3298392333786674498, + 16997622168685105694, + null, + null, + 516, + 531, + 516, + 531, + 110, + 112, + true, + "first traversal", + "first traversal" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625633315922, + 2353303435662040762, + null, + null, + 548, + 552, + 548, + 552, + 116, + 117, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 389609625698616944, + 2338675321444319215, + null, + null, + 567, + 571, + 567, + 571, + 120, + 121, + true, + "item", + "item" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 12178341415895638602, + 6992187631556280274, + null, + null, + 577, + 580, + 577, + 580, + 123, + 124, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 329104159214088329, + 10104410304708836508, + null, + null, + 584, + 589, + 584, + 589, + 125, + 126, + true, + "tasks", + "tasks" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 14814034872218884114, + 4906660160194342930, + null, + null, + 614, + 622, + 614, + 622, + 131, + 132, + true, + "parallel", + "parallel" + ], + [ + "sentence", + "proper", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 856669285578903305, + 9015668049869296425, + null, + null, + 624, + 691, + 624, + 691, + 133, + 144, + true, + "The graph engine then proceeds with the parallel task computations.", + "The graph engine then proceeds with the parallel task computations." + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 2924972194163802578, + 2750704591142518027, + null, + null, + 628, + 640, + 628, + 640, + 134, + 136, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 105697770555684555, + "TEXT", + "#/texts/110", + 1.0, + 16741233145656393762, + 10469151420604399424, + null, + null, + 664, + 690, + 664, + 690, + 140, + 143, + true, + "parallel task computations", + "parallel task computations" + ], + [ + "sentence", + "proper", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 7660575871008244103, + 2316439382736820559, + null, + null, + 0, + 133, + 0, + 133, + 0, + 24, + true, + "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector.", + "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector." + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625631210899, + 12733607242456046210, + null, + null, + 9, + 13, + 9, + 13, + 2, + 3, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 12178341415895638602, + 12747294058064521499, + null, + null, + 27, + 30, + 27, + 30, + 7, + 8, + true, + "set", + "set" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752408490, + null, + null, + 34, + 39, + 34, + 39, + 9, + 10, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 127759864276493913, + 8364393908486708964, + null, + null, + 45, + 66, + 45, + 66, + 11, + 13, + true, + "corresponding weights", + "corresponding weights" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 42253711713484855, + 11311968747640274305, + null, + null, + 86, + 102, + 86, + 102, + 16, + 18, + true, + "nonzero elements", + "nonzero elements" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 7596240835069815859, + 6506102061445914504, + null, + null, + 121, + 132, + 121, + 132, + 21, + 23, + true, + "node vector", + "node vector" + ], + [ + "sentence", + "proper", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 6713072116150697613, + 11925397580613056643, + null, + null, + 134, + 266, + 134, + 266, + 24, + 49, + true, + "After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights.", + "After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights." + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 6060214652839025266, + 11966075192922756464, + null, + null, + 154, + 167, + 154, + 167, + 27, + 29, + true, + "full workflow", + "full workflow" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625631210899, + 12733607242456045036, + null, + null, + 198, + 202, + 198, + 202, + 35, + 36, + true, + "task", + "task" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625633315922, + 12733621748595533263, + null, + null, + 205, + 209, + 205, + 209, + 37, + 38, + true, + "list", + "list" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752447655, + null, + null, + 213, + 218, + 213, + 218, + 39, + 40, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 8106477822555716423, + 7780647823283838272, + null, + null, + 258, + 265, + 258, + 265, + 47, + 48, + true, + "weights", + "weights" + ], + [ + "sentence", + "proper", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 8216591865615385703, + 5126502945309751658, + null, + null, + 267, + 333, + 267, + 333, + 49, + 64, + true, + "The higher the weight of the node, the more relevant this node is.", + "The higher the weight of the node, the more relevant this node is." + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 16381206557786164800, + 733620420485756914, + null, + null, + 282, + 288, + 282, + 288, + 52, + 53, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625621164460, + 12733685650958048817, + null, + null, + 296, + 300, + 296, + 300, + 55, + 56, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 389609625621164460, + 12733685650958002482, + null, + null, + 325, + 329, + 325, + 329, + 61, + 62, + true, + "node", + "node" + ], + [ + "sentence", + "proper", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 12108525545318378453, + 12728621031671223359, + null, + null, + 334, + 420, + 334, + 420, + 64, + 82, + true, + "As such, we can also retrace which nodes were important in each stage of the workflow.", + "As such, we can also retrace which nodes were important in each stage of the workflow." + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161758737773, + 2433416455752429456, + null, + null, + 369, + 374, + 369, + 374, + 72, + 73, + true, + "nodes", + "nodes" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 329104161640029084, + 2392075965011744506, + null, + null, + 398, + 403, + 398, + 403, + 77, + 78, + true, + "stage", + "stage" + ], + [ + "term", + "single-term", + 15938840672015995359, + "TEXT", + "#/texts/111", + 1.0, + 14638857990842534974, + 9783826650048006204, + null, + null, + 411, + 419, + 411, + 419, + 80, + 81, + true, + "workflow", + "workflow" + ], + [ + "numval", + "ival", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 17767354399704235156, + 6951916224121472658, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "4", + "4" + ], + [ + "sentence", + "improper", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 3669348819955245594, + 11254032301165265326, + null, + null, + 2, + 31, + 2, + 31, + 1, + 6, + true, + "| CLOUD DESIGN AND DEPLOYMENT", + "| CLOUD DESIGN AND DEPLOYMENT" + ], + [ + "term", + "enum-term-mark-4", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 5437625579903233791, + 18074126645980437463, + null, + null, + 4, + 31, + 4, + 31, + 2, + 6, + true, + "CLOUD DESIGN AND DEPLOYMENT", + "CLOUD DESIGN AND DEPLOYMENT" + ], + [ + "term", + "single-term", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 11753857894419936394, + 17603578644174785442, + null, + null, + 4, + 16, + 4, + 16, + 2, + 4, + true, + "CLOUD DESIGN", + "CLOUD DESIGN" + ], + [ + "term", + "single-term", + 16505790528099785698, + "TEXT", + "#/texts/112", + 1.0, + 7198623583390732929, + 15068365172116912497, + null, + null, + 21, + 31, + 21, + 31, + 5, + 6, + true, + "DEPLOYMENT", + "DEPLOYMENT" + ], + [ + "sentence", + "proper", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1636228667275136893, + 6829252488313690685, + null, + null, + 0, + 93, + 0, + 93, + 0, + 15, + true, + "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes.", + "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes." + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 5618072291515280850, + 3895024354767273975, + null, + null, + 4, + 29, + 4, + 29, + 1, + 4, + true, + "primary deployment target", + "primary deployment target" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 12178341415896222428, + 885100098113248695, + null, + null, + 38, + 41, + 38, + 41, + 6, + 7, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8924667775203066445, + 876400847254491566, + null, + null, + 47, + 64, + 47, + 64, + 9, + 11, + true, + "cloud environment", + "cloud environment" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 7094347613010931393, + 12496963414215338837, + null, + null, + 82, + 92, + 82, + 92, + 13, + 14, + true, + "Kubernetes", + "Kubernetes" + ], + [ + "sentence", + "proper", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1358397889873033065, + 11303318607487088116, + null, + null, + 94, + 188, + 94, + 188, + 15, + 32, + true, + "We package the full platform assets with a Helm chart for quick deployment on multiple setups.", + "We package the full platform assets with a Helm chart for quick deployment on multiple setups." + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1819480924935159279, + 6577470514585879465, + null, + null, + 109, + 129, + 109, + 129, + 18, + 21, + true, + "full platform assets", + "full platform assets" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 4638979376537582648, + 17377998509573799350, + null, + null, + 137, + 147, + 137, + 147, + 23, + 25, + true, + "Helm chart", + "Helm chart" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 11418810401555064622, + 17261720540066472098, + null, + null, + 152, + 168, + 152, + 168, + 26, + 28, + true, + "quick deployment", + "quick deployment" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 16086744429721457666, + 17482235260769052298, + null, + null, + 172, + 187, + 172, + 187, + 29, + 31, + true, + "multiple setups", + "multiple setups" + ], + [ + "sentence", + "proper", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 1429830794837597235, + 13476385461814770755, + null, + null, + 189, + 334, + 189, + 334, + 32, + 66, + true, + "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", + "For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes." + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8106397496085150773, + 17340139994990751517, + null, + null, + 193, + 200, + 193, + 200, + 33, + 34, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 14814125365076808131, + 7721049804809485492, + null, + null, + 226, + 234, + 226, + 234, + 39, + 40, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 6560670568286016569, + 16077107774923354521, + null, + null, + 242, + 251, + 242, + 251, + 42, + 44, + true, + "IBM Cloud", + "IBM Cloud" + ], + [ + "expression", + "word-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 14042857724397157868, + 7723400572482154889, + null, + null, + 255, + 265, + 255, + 265, + 45, + 48, + true, + "on-premise", + "on-premise" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 14654282197483479279, + 6319409503642338229, + null, + null, + 257, + 265, + 257, + 265, + 46, + 48, + true, + "-premise", + "-premise" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 11202420113292414044, + 3143243574265492705, + null, + null, + 272, + 298, + 272, + 298, + 50, + 54, + true, + "IBM Cloud Private instance", + "IBM Cloud Private instance" + ], + [ + "expression", + "wtoken-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 8106351670279655367, + 7496390199284522039, + null, + null, + 308, + 315, + 308, + 315, + 57, + 61, + true, + "x86-and", + "x86-and" + ], + [ + "numval", + "ival", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 15441160910541481039, + 13036410263911933256, + null, + null, + 309, + 311, + 309, + 311, + 58, + 59, + true, + "86", + "86" + ], + [ + "expression", + "word-concatenation", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 13984993501352220634, + 15736780337580684731, + null, + null, + 316, + 327, + 316, + 327, + 61, + 64, + true, + "POWER-based", + "POWER-based" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 329104162118942300, + 6643300276023836651, + null, + null, + 316, + 321, + 316, + 321, + 61, + 62, + true, + "POWER", + "POWER" + ], + [ + "term", + "single-term", + 14738723905055920039, + "TEXT", + "#/texts/113", + 1.0, + 329104161758737773, + 6632913905614199357, + null, + null, + 328, + 333, + 328, + 333, + 64, + 65, + true, + "nodes", + "nodes" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9940486682659996162, + 7460915234096730767, + null, + null, + 0, + 60, + 0, + 60, + 0, + 16, + true, + "In Figure 5, we show the high-level cloud design of the CPS.", + "In Figure 5, we show the high-level cloud design of the CPS." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206514091025767, + 16142590003889368025, + null, + null, + 3, + 9, + 3, + 9, + 1, + 2, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 17767354399704235157, + 8852681642826623127, + null, + null, + 10, + 11, + 10, + 11, + 2, + 3, + true, + "5", + "5" + ], + [ + "expression", + "word-concatenation", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 15927123524139923400, + 1285026772075360147, + null, + null, + 25, + 35, + 25, + 35, + 7, + 10, + true, + "high-level", + "high-level" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 18177564421258906850, + 13663617698585510754, + null, + null, + 30, + 48, + 30, + 48, + 9, + 12, + true, + "level cloud design", + "level cloud design" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896222428, + 11806956420576415143, + null, + null, + 56, + 59, + 56, + 59, + 14, + 15, + true, + "CPS", + "CPS" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12549929982155038286, + 8880624972218295958, + null, + null, + 61, + 253, + 61, + 253, + 16, + 47, + true, + "The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs.", + "The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14814125365076808131, + 10951131595958552084, + null, + null, + 65, + 73, + 65, + 73, + 17, + 18, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 15626222303133683856, + 15883470027027509782, + null, + null, + 110, + 127, + 110, + 127, + 24, + 26, + true, + "corpus processing", + "corpus processing" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 11015260948057552074, + 17580405417276948030, + null, + null, + 133, + 152, + 133, + 152, + 28, + 30, + true, + "multitenant fashion", + "multitenant fashion" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 5768949209294808779, + 6442116341236141760, + null, + null, + 174, + 212, + 174, + 212, + 36, + 40, + true, + "multiple knowledge ingestion pipelines", + "multiple knowledge ingestion pipelines" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 1523072489578460107, + 18058886189458919981, + null, + null, + 227, + 252, + 227, + 252, + 43, + 46, + true, + "multiple knowledge graphs", + "multiple knowledge graphs" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12471785161413140906, + 13074093811019311665, + null, + null, + 254, + 435, + 254, + 435, + 47, + 86, + true, + "We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS).", + "We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS)." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625632190829, + 14110524954774214321, + null, + null, + 267, + 271, + 267, + 271, + 50, + 51, + true, + "unit", + "unit" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8279239668964789243, + 8460390903338403472, + null, + null, + 274, + 295, + 274, + 295, + 52, + 55, + true, + "Knowledge Graph Space", + "Knowledge Graph Space" + ], + [ + "parenthesis", + "round brackets", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104053572287295, + 8245453909560823187, + null, + null, + 296, + 301, + 296, + 301, + 55, + 58, + true, + "(KGS)", + "(KGS)" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868048232, + null, + null, + 297, + 300, + 297, + 300, + 56, + 57, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9231360134118267820, + 4162022022779188999, + null, + null, + 323, + 341, + 323, + 341, + 63, + 65, + true, + "dedicated instance", + "dedicated instance" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2924972194163802578, + 17588639801993416525, + null, + null, + 349, + 361, + 349, + 361, + 67, + 69, + true, + "graph engine", + "graph engine" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 5728204332742385536, + 4123494469593944677, + null, + null, + 365, + 391, + 365, + 391, + 71, + 74, + true, + "dedicated MongoDB database", + "dedicated MongoDB database" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206569515593601, + 14517360063753258776, + null, + null, + 398, + 404, + 398, + 404, + 76, + 77, + true, + "bucket", + "bucket" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8770614496471502937, + 13041000514974023818, + null, + null, + 410, + 428, + 410, + 428, + 79, + 82, + true, + "cloud object store", + "cloud object store" + ], + [ + "parenthesis", + "round brackets", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104053210133820, + 9015135107673804315, + null, + null, + 429, + 434, + 429, + 434, + 82, + 85, + true, + "(COS)", + "(COS)" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896222365, + 11806957828061927556, + null, + null, + 430, + 433, + 430, + 433, + 83, + 84, + true, + "COS", + "COS" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 9535520267303438664, + 1803249748326675225, + null, + null, + 436, + 522, + 436, + 522, + 86, + 102, + true, + "A dashboard allows each project owner to manage the access and the usage of resources.", + "A dashboard allows each project owner to manage the access and the usage of resources." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6165495539515404595, + 4111005573482539739, + null, + null, + 438, + 447, + 438, + 447, + 87, + 88, + true, + "dashboard", + "dashboard" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14090783558906999057, + 12854087600048197361, + null, + null, + 460, + 473, + 460, + 473, + 90, + 92, + true, + "project owner", + "project owner" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206568743641958, + 14433515699806996137, + null, + null, + 488, + 494, + 488, + 494, + 95, + 96, + true, + "access", + "access" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 329104159157898666, + 13068121140499570811, + null, + null, + 503, + 508, + 503, + 508, + 98, + 99, + true, + "usage", + "usage" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6168338487309432467, + 10223983486064089367, + null, + null, + 512, + 521, + 512, + 521, + 100, + 101, + true, + "resources", + "resources" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2728497743282239280, + 9042362260686479835, + null, + null, + 523, + 621, + 523, + 621, + 102, + 119, + true, + "The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster.", + "The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868069314, + null, + null, + 527, + 530, + 527, + 530, + 103, + 104, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 2183265494021555923, + 18896001468121266, + null, + null, + 552, + 568, + 552, + 568, + 108, + 110, + true, + "multiple flavors", + "multiple flavors" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 13498654401954296707, + 13294723622435080231, + null, + null, + 594, + 605, + 594, + 605, + 114, + 115, + true, + "utilization", + "utilization" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8106398485449787361, + 1185060867801324202, + null, + null, + 613, + 620, + 613, + 620, + 117, + 118, + true, + "cluster", + "cluster" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 18079656800177842635, + 3681146273233398143, + null, + null, + 622, + 760, + 622, + 760, + 119, + 142, + true, + "These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs.", + "These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 8106397453898023678, + 14703729743636443765, + null, + null, + 628, + 635, + 628, + 635, + 120, + 121, + true, + "flavors", + "flavors" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14387950977550393964, + 17001907116386229501, + null, + null, + 649, + 664, + 649, + 664, + 124, + 126, + true, + "virtual machine", + "virtual machine" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 7252014402665196659, + 11294021744305298020, + null, + null, + 670, + 682, + 670, + 682, + 127, + 129, + true, + "small amount", + "small amount" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16381206567042997791, + 14500738963527654425, + null, + null, + 686, + 692, + 686, + 692, + 130, + 131, + true, + "memory", + "memory" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 4316842714742551971, + 6737419106451654162, + null, + null, + 698, + 717, + 698, + 717, + 133, + 136, + true, + "full dedicated node", + "full dedicated node" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 6527251669177900630, + 6862896336097821307, + null, + null, + 728, + 749, + 728, + 749, + 137, + 139, + true, + "hardware acceleration", + "hardware acceleration" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625538377862, + 14063335216330195650, + null, + null, + 755, + 759, + 755, + 759, + 140, + 141, + true, + "GPUs", + "GPUs" + ], + [ + "sentence", + "proper", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 16727938760460489694, + 682464437352803246, + null, + null, + 761, + 847, + 761, + 847, + 142, + 161, + true, + "Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", + "Once a KGS is created, it can be paused and rescaled without loss of data or downtime." + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 12178341415896254082, + 11806952638868086348, + null, + null, + 768, + 771, + 768, + 771, + 144, + 145, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625633531326, + 14110520053719926279, + null, + null, + 822, + 826, + 822, + 826, + 155, + 156, + true, + "loss", + "loss" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 389609625696431489, + 14108321371888967520, + null, + null, + 830, + 834, + 830, + 834, + 157, + 158, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 5699550326698755904, + "TEXT", + "#/texts/114", + 1.0, + 14650400971381441271, + 15853770385546829868, + null, + null, + 838, + 846, + 838, + 846, + 159, + 160, + true, + "downtime", + "downtime" + ], + [ + "sentence", + "proper", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 3277226034499692930, + 8257764068713046586, + null, + null, + 0, + 111, + 0, + 111, + 0, + 20, + true, + "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution.", + "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution." + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14857819661511796263, + 2424012575699788348, + null, + null, + 8, + 28, + 8, + 28, + 2, + 5, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 10007149380535166539, + 1692146958599972346, + null, + null, + 48, + 75, + 48, + 75, + 9, + 12, + true, + "asynchronous compute scheme", + "asynchronous compute scheme" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 10465352779691143250, + 3951289883803806710, + null, + null, + 98, + 110, + 98, + 110, + 17, + 19, + true, + "CCS solution", + "CCS solution" + ], + [ + "numval", + "ival", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 17767354399704235161, + 9537684729007623, + null, + null, + 112, + 113, + 112, + 113, + 20, + 21, + true, + "1", + "1" + ], + [ + "sentence", + "proper", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 17127552860842301864, + 8770197111614698857, + null, + null, + 114, + 256, + 114, + 256, + 21, + 47, + true, + "The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend.", + "The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend." + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 16381206550376895780, + 14690993036419098898, + null, + null, + 118, + 124, + 118, + 124, + 22, + 23, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 389609625632179162, + 1006423631488985152, + null, + null, + 143, + 147, + 143, + 147, + 27, + 28, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 11572792171430282447, + 1021301182691757038, + null, + null, + 155, + 167, + 155, + 167, + 30, + 32, + true, + "API frontend", + "API frontend" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14878173643529361829, + 14456811929852907504, + null, + null, + 194, + 209, + 194, + 209, + 36, + 38, + true, + "compute workers", + "compute workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 15654770817534947005, + 10287959561185320802, + null, + null, + 220, + 234, + 220, + 234, + 40, + 42, + true, + "message broker", + "message broker" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 6406733444931989980, + 2171075967878720181, + null, + null, + 241, + 255, + 241, + 255, + 44, + 46, + true, + "result backend", + "result backend" + ], + [ + "sentence", + "proper", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8169839874033251818, + 4419439272810104045, + null, + null, + 257, + 366, + 257, + 366, + 47, + 70, + true, + "The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs.", + "The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs." + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8106478059506484182, + 11556117617919976622, + null, + null, + 261, + 268, + 261, + 268, + 48, + 49, + true, + "workers", + "workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 389609625696431489, + 1125351843508795170, + null, + null, + 284, + 288, + 284, + 288, + 52, + 53, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 16772942504422841315, + 12869590969867328268, + null, + null, + 311, + 325, + 311, + 325, + 59, + 61, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8770614496471502937, + 4425801382089339238, + null, + null, + 332, + 350, + 332, + 350, + 63, + 66, + true, + "cloud object store", + "cloud object store" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5326252220577355949, + 11291971315896721582, + null, + null, + 355, + 365, + 355, + 365, + 67, + 69, + true, + "data blobs", + "data blobs" + ], + [ + "sentence", + "proper", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5715822533324454462, + 3737780764433609993, + null, + null, + 367, + 477, + 367, + 477, + 70, + 89, + true, + "These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", + "These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform." + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 8106478059506484182, + 11556117617919928746, + null, + null, + 373, + 380, + 373, + 380, + 71, + 72, + true, + "workers", + "workers" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 4086536176752834180, + 4868222512876603371, + null, + null, + 411, + 429, + 411, + 429, + 77, + 79, + true, + "cloud orchestrator", + "cloud orchestrator" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 5679217233562387039, + 12739262502871868839, + null, + null, + 448, + 460, + 448, + 460, + 83, + 85, + true, + "current load", + "current load" + ], + [ + "term", + "single-term", + 11609131422778723150, + "TEXT", + "#/texts/115", + 1.0, + 14814125365076808131, + 16428429469880175089, + null, + null, + 468, + 476, + 468, + 476, + 87, + 88, + true, + "platform", + "platform" + ], + [ + "sentence", + "proper", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 3168972455566929645, + 14299943161362031928, + null, + null, + 0, + 103, + 0, + 103, + 0, + 19, + true, + "The processing of the KG creation typically starts with the user submitting the DF to the frontend API.", + "The processing of the KG creation typically starts with the user submitting the DF to the frontend API." + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 14088627147213114570, + 13854182623796006327, + null, + null, + 4, + 14, + 4, + 14, + 1, + 2, + true, + "processing", + "processing" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 2196851006559809291, + 9830489088948642112, + null, + null, + 22, + 33, + 22, + 33, + 4, + 6, + true, + "KG creation", + "KG creation" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 389609625632179162, + 8646106573348311993, + null, + null, + 60, + 64, + 60, + 64, + 10, + 11, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 15441160910541480770, + 13534996573266716272, + null, + null, + 80, + 82, + 80, + 82, + 13, + 14, + true, + "DF", + "DF" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 18368248485730797851, + 14211587989421610259, + null, + null, + 90, + 102, + 90, + 102, + 16, + 18, + true, + "frontend API", + "frontend API" + ], + [ + "sentence", + "proper", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 13906228526952191149, + 3075418305413472422, + null, + null, + 104, + 301, + 104, + 301, + 19, + 56, + true, + "The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks.", + "The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks." + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12178341415896112046, + 4574774009824171048, + null, + null, + 108, + 111, + 108, + 111, + 20, + 21, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 13985988710970420061, + 12935943121770737018, + null, + null, + 115, + 125, + 115, + 125, + 22, + 23, + true, + "operations", + "operations" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 1686262582468728546, + 18069301562578694550, + null, + null, + 166, + 182, + 166, + 182, + 30, + 32, + true, + "previous section", + "previous section" + ], + [ + "expression", + "word-concatenation", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12091012031128966489, + 4422884735184098490, + null, + null, + 187, + 199, + 187, + 199, + 33, + 36, + true, + "fine-grained", + "fine-grained" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 1807540568077309272, + 15117894055096245192, + null, + null, + 192, + 205, + 192, + 205, + 35, + 37, + true, + "grained tasks", + "grained tasks" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 16381206570348587859, + 1833011693751406972, + null, + null, + 227, + 233, + 227, + 233, + 41, + 42, + true, + "broker", + "broker" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 8106397496085150773, + 11380064558136438485, + null, + null, + 239, + 246, + 239, + 246, + 44, + 45, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 7803735128811820247, + 8028207541414251541, + null, + null, + 252, + 264, + 252, + 264, + 47, + 49, + true, + "whole corpus", + "whole corpus" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 5635562936121152164, + 16688760703459948603, + null, + null, + 277, + 300, + 277, + 300, + 52, + 55, + true, + "many independent chunks", + "many independent chunks" + ], + [ + "sentence", + "proper", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 3333414693963617816, + 12948913819424524581, + null, + null, + 302, + 404, + 302, + 404, + 56, + 75, + true, + "The user receives an overall status from the API and is notified when the DF processing has completed.", + "The user receives an overall status from the API and is notified when the DF processing has completed." + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 389609625632179162, + 8646106573348360699, + null, + null, + 306, + 310, + 306, + 310, + 57, + 58, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 8842823732426861704, + 17629510401470609093, + null, + null, + 323, + 337, + 323, + 337, + 60, + 62, + true, + "overall status", + "overall status" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 12178341415896230896, + 4574893999665149592, + null, + null, + 347, + 350, + 347, + 350, + 64, + 65, + true, + "API", + "API" + ], + [ + "term", + "single-term", + 788128893109726279, + "TEXT", + "#/texts/116", + 1.0, + 16299080740739724047, + 11080747030069412617, + null, + null, + 376, + 389, + 376, + 389, + 70, + 72, + true, + "DF processing", + "DF processing" + ], + [ + "sentence", + "proper", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14300472229212663005, + 14392697989286948225, + null, + null, + 0, + 120, + 0, + 120, + 0, + 25, + true, + "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS.", + "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS." + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106395850436073177, + 11012666495072522005, + null, + null, + 4, + 11, + 4, + 11, + 1, + 3, + true, + "KG data", + "KG data" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12787341663997074868, + 2266405206495988392, + null, + null, + 42, + 59, + 42, + 59, + 7, + 9, + true, + "storage solutions", + "storage solutions" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16772942504422841315, + 2277994577118317964, + null, + null, + 63, + 77, + 63, + 77, + 11, + 13, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16918978243275188323, + 1110270592458797827, + null, + null, + 81, + 101, + 81, + 101, + 15, + 18, + true, + "cloud object storage", + "cloud object storage" + ], + [ + "parenthesis", + "round brackets", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104053210133820, + 10787425083780243479, + null, + null, + 102, + 107, + 102, + 107, + 18, + 21, + true, + "(COS)", + "(COS)" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896222365, + 3228078444842065286, + null, + null, + 103, + 106, + 103, + 106, + 19, + 20, + true, + "COS", + "COS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896254082, + 3228102135893419567, + null, + null, + 116, + 119, + 116, + 119, + 23, + 24, + true, + "KGS", + "KGS" + ], + [ + "sentence", + "proper", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12082365070187585802, + 16167088524264065499, + null, + null, + 121, + 273, + 121, + 273, + 25, + 55, + true, + "Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph.", + "Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph." + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442844888, + null, + null, + 126, + 130, + 126, + 130, + 26, + 27, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14650401089286948001, + 9314496952887964373, + null, + null, + 151, + 159, + 151, + 159, + 31, + 32, + true, + "document", + "document" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16772942504422841315, + 2277994577118313177, + null, + null, + 165, + 179, + 165, + 179, + 34, + 36, + true, + "NoSQL database", + "NoSQL database" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14088628410271132453, + 4341732016105186977, + null, + null, + 203, + 213, + 203, + 213, + 40, + 41, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442851462, + null, + null, + 230, + 234, + 230, + 234, + 44, + 45, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106397496085150773, + 2009680286188387580, + null, + null, + 240, + 247, + 240, + 247, + 47, + 48, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625631325904, + 11937730708172995984, + null, + null, + 253, + 257, + 253, + 257, + 50, + 51, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 6169141668427353082, + 14431783315368920148, + null, + null, + 263, + 272, + 263, + 272, + 53, + 54, + true, + "paragraph", + "paragraph" + ], + [ + "sentence", + "proper", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 610319572008007953, + 6746694152812115550, + null, + null, + 274, + 393, + 274, + 393, + 55, + 83, + true, + "If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS.", + "If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS." + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 11152620526374970124, + 14811456031713141955, + null, + null, + 288, + 301, + 288, + 301, + 59, + 61, + true, + "binary object", + "binary object" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 389609625621164460, + 11938016511442849064, + null, + null, + 318, + 322, + 318, + 322, + 64, + 65, + true, + "node", + "node" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106397496085150773, + 2009680286188378662, + null, + null, + 328, + 335, + 328, + 335, + 67, + 68, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12366808243217836777, + 5730782230103722615, + null, + null, + 341, + 353, + 341, + 353, + 70, + 72, + true, + "PDF document", + "PDF document" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104161828335551, + 12330380347516247933, + null, + null, + 360, + 365, + 360, + 365, + 74, + 75, + true, + "image", + "image" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896222365, + 3228078444843066386, + null, + null, + 389, + 392, + 389, + 392, + 81, + 82, + true, + "COS", + "COS" + ], + [ + "sentence", + "proper", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 4344494126371972185, + 2350781277598128576, + null, + null, + 394, + 572, + 394, + 572, + 83, + 115, + true, + "The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", + "The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search." + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 12178341415896254082, + 3228102135893405214, + null, + null, + 398, + 401, + 398, + 401, + 84, + 85, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 7129359759299976443, + 13989298049334872036, + null, + null, + 420, + 439, + 420, + 439, + 88, + 90, + true, + "minimal information", + "minimal information" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 8106477782290185579, + 19841994463607863, + null, + null, + 462, + 469, + 462, + 469, + 94, + 95, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 11015713444890684392, + 5016937512725344760, + null, + null, + 484, + 496, + 484, + 496, + 100, + 101, + true, + "connectivity", + "connectivity" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 329104159211247965, + 4925265353295993783, + null, + null, + 504, + 509, + 504, + 509, + 103, + 104, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 14088628410271132453, + 4341732016105174192, + null, + null, + 518, + 528, + 518, + 528, + 106, + 107, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7029344862946908483, + "TEXT", + "#/texts/117", + 1.0, + 16381206577802837709, + 3639100746832045022, + null, + null, + 565, + 571, + 565, + 571, + 113, + 114, + true, + "search", + "search" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/118", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541481983, + 7629680595941988994, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "11", + "11" + ], + [ + "sentence", + "improper", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541485670, + 7629680776796918969, + null, + null, + 2, + 4, + 2, + 4, + 1, + 2, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 2144926686518491811, + "TEXT", + "#/texts/119", + 1.0, + 15441160910541481979, + 7629680596056147236, + null, + null, + 4, + 6, + 4, + 6, + 2, + 3, + true, + "15", + "15" + ], + [ + "sentence", + "proper", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16074792491060269581, + 16392047884217890893, + null, + null, + 0, + 126, + 0, + 126, + 0, + 24, + true, + "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources.", + "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 12178341415896254082, + 3344072087811986116, + null, + null, + 4, + 7, + 4, + 7, + 1, + 2, + true, + "KGS", + "KGS" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625632179162, + 8614714760573598927, + null, + null, + 26, + 30, + 26, + 30, + 6, + 7, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 14652188385274391907, + 2694673140760145603, + null, + null, + 37, + 45, + 37, + 45, + 9, + 11, + true, + "REST API", + "REST API" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106478445190161533, + 4934944743075669717, + null, + null, + 73, + 80, + 73, + 80, + 16, + 17, + true, + "results", + "results" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 9625448109592502994, + 9647746636491739728, + null, + null, + 100, + 125, + 100, + 125, + 20, + 23, + true, + "different storage sources", + "different storage sources" + ], + [ + "sentence", + "proper", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 13174820656604838662, + 16260063485294524084, + null, + null, + 127, + 252, + 127, + 252, + 24, + 44, + true, + "To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally.", + "To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 6709594336853371761, + 11687052192579585167, + null, + null, + 137, + 155, + 137, + 155, + 26, + 28, + true, + "decent performance", + "decent performance" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106477782290185579, + 12680231174549344170, + null, + null, + 169, + 176, + 169, + 176, + 30, + 31, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 17200993861033027072, + 1744565019688702260, + null, + null, + 180, + 194, + 180, + 194, + 32, + 34, + true, + "multiple users", + "multiple users" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2924972194163802578, + 10534198242190806077, + null, + null, + 200, + 212, + 200, + 212, + 36, + 38, + true, + "graph engine", + "graph engine" + ], + [ + "sentence", + "proper", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 15983440293168997448, + 7963308543571437549, + null, + null, + 253, + 354, + 253, + 354, + 44, + 61, + true, + "Most workflow queries execute fast enough such that they can be responded from a synchronous request.", + "Most workflow queries execute fast enough such that they can be responded from a synchronous request." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16360526185581748651, + 15299875967681492548, + null, + null, + 253, + 274, + 253, + 274, + 44, + 47, + true, + "Most workflow queries", + "Most workflow queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 7499883450733855176, + 12045430344624045691, + null, + null, + 334, + 353, + 334, + 353, + 58, + 60, + true, + "synchronous request", + "synchronous request" + ], + [ + "sentence", + "proper", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2443264249188056591, + 8310367412232015326, + null, + null, + 355, + 460, + 355, + 460, + 61, + 79, + true, + "Others, especially the graph analytics computations, are more expensive and return large amounts of data.", + "Others, especially the graph analytics computations, are more expensive and return large amounts of data." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 16381206477857958587, + 4024277227926320818, + null, + null, + 355, + 361, + 355, + 361, + 61, + 62, + true, + "Others", + "Others" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 9728303362208378504, + 11302837117618091413, + null, + null, + 378, + 406, + 378, + 406, + 65, + 68, + true, + "graph analytics computations", + "graph analytics computations" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 11805639520798919476, + 7644867692249185725, + null, + null, + 438, + 451, + 438, + 451, + 74, + 76, + true, + "large amounts", + "large amounts" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625696431489, + 8612777845572934547, + null, + null, + 455, + 459, + 455, + 459, + 77, + 78, + true, + "data", + "data" + ], + [ + "sentence", + "proper", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 17420955785017825293, + 4530821838279240147, + null, + null, + 461, + 596, + 461, + 596, + 79, + 103, + true, + "Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", + "Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion." + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106477782290185579, + 12680231174549324694, + null, + null, + 473, + 480, + 473, + 480, + 82, + 83, + true, + "queries", + "queries" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 18427810349028856651, + 2258223692032184753, + null, + null, + 505, + 521, + 505, + 521, + 87, + 89, + true, + "asynchronous API", + "asynchronous API" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 8106478445190161533, + 4934944743075576931, + null, + null, + 530, + 537, + 530, + 537, + 91, + 92, + true, + "results", + "results" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 389609625632179162, + 8614714760573567106, + null, + null, + 577, + 581, + 577, + 581, + 99, + 100, + true, + "user", + "user" + ], + [ + "term", + "single-term", + 18333396269095847693, + "TEXT", + "#/texts/120", + 1.0, + 2703018890303469599, + 9579262064613450677, + null, + null, + 585, + 595, + 585, + 595, + 101, + 102, + true, + "completion", + "completion" + ], + [ + "numval", + "ival", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 17767354399704235157, + 11518089933568466075, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "5", + "5" + ], + [ + "sentence", + "improper", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 2621907744440686475, + 9056515438346593466, + null, + null, + 2, + 39, + 2, + 39, + 1, + 9, + true, + "| CASE STUDY: OIL AND GAS EXPLORATION", + "| CASE STUDY: OIL AND GAS EXPLORATION" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 250883940722560618, + 5731782570955531308, + null, + null, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "CASE STUDY", + "CASE STUDY" + ], + [ + "term", + "enum-term-mark-4", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 18038659283920252343, + 8396203383843088821, + null, + null, + 16, + 39, + 16, + 39, + 5, + 9, + true, + "OIL AND GAS EXPLORATION", + "OIL AND GAS EXPLORATION" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 12178341415896270517, + 129409062461846188, + null, + null, + 16, + 19, + 16, + 19, + 5, + 6, + true, + "OIL", + "OIL" + ], + [ + "term", + "single-term", + 4030998538427149966, + "TEXT", + "#/texts/121", + 1.0, + 7606713323162423099, + 12398485310842551463, + null, + null, + 24, + 39, + 24, + 39, + 7, + 9, + true, + "GAS EXPLORATION", + "GAS EXPLORATION" + ], + [ + "sentence", + "proper", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8216705235205389735, + 240810668415429324, + null, + null, + 0, + 67, + 0, + 67, + 0, + 13, + true, + "Oil and gas exploration is a complex, technical field of expertise.", + "Oil and gas exploration is a complex, technical field of expertise." + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6389497618120621824, + 3882287786623873346, + null, + null, + 0, + 23, + 0, + 23, + 0, + 4, + true, + "Oil and gas exploration", + "Oil and gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415896272573, + 479144706274466459, + null, + null, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "Oil", + "Oil" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10692163443301812358, + 14369438466538696698, + null, + null, + 8, + 23, + 8, + 23, + 2, + 4, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6630151693041027733, + 13667311119554727837, + null, + null, + 38, + 53, + 38, + 53, + 8, + 10, + true, + "technical field", + "technical field" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6168316375272172674, + 7063048620659196911, + null, + null, + 57, + 66, + 57, + 66, + 11, + 12, + true, + "expertise", + "expertise" + ], + [ + "sentence", + "proper", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 11961004859214314258, + 4927401767963333110, + null, + null, + 68, + 278, + 68, + 278, + 13, + 51, + true, + "Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures.", + "Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures." + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 389609625696431489, + 7981605993880509721, + null, + null, + 87, + 91, + 87, + 91, + 16, + 17, + true, + "data", + "data" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 4574553070187117214, + 5477991605964996477, + null, + null, + 95, + 120, + 95, + 120, + 18, + 21, + true, + "many geological processes", + "many geological processes" + ], + [ + "term", + "enum-term-mark-3", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 4820933926787397751, + 317282169431161814, + null, + null, + 111, + 133, + 111, + 133, + 20, + 23, + true, + "processes and entities", + "processes and entities" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 14652256560445338257, + 6970476607163866107, + null, + null, + 125, + 133, + 125, + 133, + 22, + 23, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6165495739559760741, + 2196775454065369079, + null, + null, + 154, + 163, + 154, + 163, + 26, + 27, + true, + "databases", + "databases" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7373849855264861449, + 365116357501177202, + null, + null, + 164, + 188, + 164, + 188, + 27, + 32, + true, + "(public and proprietary)", + "(public and proprietary)" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106398483106473371, + 17672855256048813131, + null, + null, + 193, + 200, + 193, + 200, + 33, + 34, + true, + "corpora", + "corpora" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6167933651658664291, + 15693796346909339457, + null, + null, + 204, + 213, + 204, + 213, + 35, + 36, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 389609625631325904, + 7977943539407590204, + null, + null, + 252, + 256, + 252, + 256, + 44, + 45, + true, + "text", + "text" + ], + [ + "term", + "enum-term-mark-3", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9207207424694358454, + 3444507114153246338, + null, + null, + 258, + 277, + 258, + 277, + 46, + 50, + true, + "tables, and figures", + "tables, and figures" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16381206513098478539, + 16420719447640918384, + null, + null, + 258, + 264, + 258, + 264, + 46, + 47, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397480533647371, + 10093626472338736862, + null, + null, + 270, + 277, + 270, + 277, + 49, + 50, + true, + "figures", + "figures" + ], + [ + "sentence", + "proper", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12899401129752394980, + 7318808495444355734, + null, + null, + 279, + 367, + 279, + 367, + 51, + 66, + true, + "This is a serious impediment for efficient exploration of new oil and gas opportunities.", + "This is a serious impediment for efficient exploration of new oil and gas opportunities." + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16857074076267844634, + 10288321581388192914, + null, + null, + 289, + 307, + 289, + 307, + 54, + 56, + true, + "serious impediment", + "serious impediment" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7888665710418232344, + 13630946120734502191, + null, + null, + 312, + 333, + 312, + 333, + 57, + 59, + true, + "efficient exploration", + "efficient exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106342689900901717, + 18192642792638057808, + null, + null, + 337, + 344, + 337, + 344, + 60, + 62, + true, + "new oil", + "new oil" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9418848057117014737, + 5402506232376820687, + null, + null, + 341, + 352, + 341, + 352, + 61, + 64, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 17842890634558813266, + 10975865632036036027, + null, + null, + 349, + 366, + 349, + 366, + 63, + 65, + true, + "gas opportunities", + "gas opportunities" + ], + [ + "sentence", + "proper", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 18349903503178560352, + 8780732856130854312, + null, + null, + 368, + 661, + 368, + 655, + 66, + 117, + true, + "For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles.", + "For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles." + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397496085150773, + 15308866410956677851, + null, + null, + 372, + 379, + 372, + 379, + 67, + 68, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7139567255410303030, + 8446684552119017715, + null, + null, + 381, + 403, + 381, + 403, + 69, + 71, + true, + "geographic information", + "geographic information" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15928367849318151150, + 6690657827796758810, + null, + null, + 407, + 428, + 407, + 428, + 72, + 74, + true, + "geological structures", + "geological structures" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 9567036524588108536, + 1501013677311244740, + null, + null, + 445, + 461, + 445, + 461, + 78, + 79, + true, + "NaturalEarthData", + "NaturalEarthData" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106398477819293336, + 17658354524441111071, + null, + null, + 485, + 492, + 479, + 486, + 83, + 84, + true, + "history", + "history" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 6172089554353143931, + 17548988686693732639, + null, + null, + 494, + 503, + 488, + 497, + 85, + 86, + true, + "evolution", + "evolution" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 2703018952916355661, + 8527312128484531618, + null, + null, + 509, + 519, + 503, + 513, + 88, + 89, + true, + "components", + "components" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 11638883092501129531, + 10880710974821402837, + null, + null, + 520, + 580, + 514, + 574, + 89, + 104, + true, + "(eg, formations with their age, rock-composition, and depth)", + "(eg, formations with their age, rock-composition, and depth)" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15441160910541487324, + 12466872848432344092, + null, + null, + 521, + 523, + 515, + 517, + 90, + 91, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 16064217528453934834, + 13674817412400112483, + null, + null, + 525, + 535, + 519, + 529, + 92, + 93, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415895571749, + 479221805921022809, + null, + null, + 547, + 550, + 541, + 544, + 95, + 96, + true, + "age", + "age" + ], + [ + "expression", + "word-concatenation", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10544831253991637042, + 6283171410540301725, + null, + null, + 552, + 568, + 546, + 562, + 97, + 100, + true, + "rock-composition", + "rock-composition" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 389609625632802170, + 7973715862595966605, + null, + null, + 552, + 556, + 546, + 550, + 97, + 98, + true, + "rock", + "rock" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 17498499536990228010, + 10410600454563159631, + null, + null, + 557, + 579, + 551, + 573, + 99, + 103, + true, + "composition, and depth", + "composition, and depth" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 14749101077007455096, + 4732542352542381958, + null, + null, + 557, + 568, + 551, + 562, + 99, + 100, + true, + "composition", + "composition" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 329104162100250438, + 5041034355187623111, + null, + null, + 574, + 579, + 568, + 573, + 102, + 103, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106478449073306569, + 15059182269430793968, + null, + null, + 598, + 605, + 592, + 599, + 107, + 108, + true, + "reports", + "reports" + ], + [ + "parenthesis", + "round brackets", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 2706078645353543038, + 3854987420067579305, + null, + null, + 606, + 636, + 600, + 630, + 108, + 113, + true, + "(governmental and proprietary)", + "(governmental and proprietary)" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 7863808487922385366, + 7668215042863067481, + null, + null, + 641, + 660, + 635, + 654, + 114, + 116, + true, + "scientific articles", + "scientific articles" + ], + [ + "sentence", + "proper", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 17752875551499627315, + 2358405120490317342, + null, + null, + 662, + 861, + 656, + 855, + 117, + 154, + true, + "As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", + "As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology." + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 8106397495764760483, + 9633777631454873600, + null, + null, + 671, + 678, + 665, + 672, + 120, + 121, + true, + "experts", + "experts" + ], + [ + "term", + "enum-term-mark-2", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 5515747999597331548, + 985317114331566672, + null, + null, + 682, + 705, + 676, + 699, + 122, + 126, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12178341415895623363, + 479231402707811038, + null, + null, + 682, + 685, + 676, + 679, + 122, + 123, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 10692163443301812358, + 14369438466538523960, + null, + null, + 690, + 705, + 684, + 699, + 124, + 126, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 12462088721494412558, + 9590850160516238357, + null, + null, + 725, + 739, + 719, + 733, + 130, + 132, + true, + "many documents", + "many documents" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 329104161571401725, + 5024703429178195243, + null, + null, + 743, + 748, + 737, + 742, + 133, + 134, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 14388065630035882329, + 13638788162857695945, + null, + null, + 765, + 776, + 759, + 770, + 138, + 139, + true, + "information", + "information" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 3808918567046385591, + 12838318632197561077, + null, + null, + 782, + 805, + 776, + 799, + 141, + 144, + true, + "certain geographic area", + "certain geographic area" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 5432038927924624855, + 3081417618916509100, + null, + null, + 816, + 834, + 810, + 828, + 147, + 149, + true, + "good understanding", + "good understanding" + ], + [ + "term", + "single-term", + 10295608624766759271, + "TEXT", + "#/texts/122", + 1.0, + 15394751475952612729, + 8785520169217661944, + null, + null, + 842, + 860, + 836, + 854, + 151, + 153, + true, + "underlying geology", + "underlying geology" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16452340106229368551, + 11552113153785899906, + null, + null, + 0, + 113, + 0, + 113, + 0, + 20, + true, + "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites.", + "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8380287248939955654, + 16738377710036347675, + null, + null, + 4, + 14, + 4, + 14, + 1, + 3, + true, + "main tasks", + "main tasks" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8106397495764760483, + 8300505182841605252, + null, + null, + 22, + 29, + 22, + 29, + 5, + 6, + true, + "experts", + "experts" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5515747999597331548, + 2684891308460414979, + null, + null, + 41, + 64, + 41, + 64, + 8, + 12, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968123871980, + null, + null, + 41, + 44, + 41, + 44, + 8, + 9, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10692163443301812358, + 13785073584184276748, + null, + null, + 49, + 64, + 49, + 64, + 10, + 12, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10658730123397856737, + 8700089933554137116, + null, + null, + 81, + 112, + 81, + 112, + 15, + 19, + true, + "potential new exploration sites", + "potential new exploration sites" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 11936152223334879196, + 11546009086188025692, + null, + null, + 114, + 185, + 114, + 185, + 20, + 36, + true, + "This is typically done by describing a basin or one of its sub-regions.", + "This is typically done by describing a basin or one of its sub-regions." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206518778, + null, + null, + 153, + 158, + 153, + 158, + 27, + 28, + true, + "basin", + "basin" + ], + [ + "expression", + "word-concatenation", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3123226645854154111, + 6696524889970483253, + null, + null, + 173, + 184, + 173, + 184, + 32, + 35, + true, + "sub-regions", + "sub-regions" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895639616, + 674500475604999116, + null, + null, + 173, + 176, + 173, + 176, + 32, + 33, + true, + "sub", + "sub" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8106478448964548679, + 7931419419855932404, + null, + null, + 177, + 184, + 177, + 184, + 34, + 35, + true, + "regions", + "regions" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6588420048013411488, + 17199279919993921695, + null, + null, + 186, + 377, + 186, + 377, + 36, + 67, + true, + "In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system.", + "In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14814125472896938138, + 12597080905343650882, + null, + null, + 189, + 197, + 189, + 197, + 37, + 38, + true, + "practice", + "practice" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206522668, + null, + null, + 214, + 219, + 214, + 219, + 42, + 43, + true, + "basin", + "basin" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159326063388, + 14165173228340727668, + null, + null, + 222, + 227, + 222, + 227, + 44, + 45, + true, + "boils", + "boils" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9648537698556423826, + 6911808250044295400, + null, + null, + 252, + 273, + 252, + 273, + 49, + 51, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14088628410271132453, + 4251286936900238374, + null, + null, + 285, + 295, + 285, + 295, + 53, + 54, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104159219516222, + 14550289508206516996, + null, + null, + 303, + 308, + 303, + 308, + 56, + 57, + true, + "basin", + "basin" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16064217528453934834, + 15050166161452124834, + null, + null, + 336, + 346, + 336, + 346, + 61, + 62, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751141125392, + null, + null, + 360, + 376, + 360, + 376, + 64, + 66, + true, + "petroleum system", + "petroleum system" + ], + [ + "numval", + "ival", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15441160910541481863, + 7242502688177594361, + null, + null, + 378, + 380, + 378, + 380, + 67, + 68, + true, + "19", + "19" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3041813118097378718, + 5477849685609028942, + null, + null, + 381, + 491, + 381, + 491, + 68, + 90, + true, + "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal.", + "In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 7395650178474964697, + 4439868107237037266, + null, + null, + 393, + 410, + 393, + 410, + 71, + 73, + true, + "minimalistic form", + "minimalistic form" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751141124595, + null, + null, + 414, + 430, + 414, + 430, + 75, + 77, + true, + "petroleum system", + "petroleum system" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498594724587, + null, + null, + 451, + 461, + 451, + 461, + 81, + 82, + true, + "components", + "components" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10890937563763904307, + 10205829752163449428, + null, + null, + 463, + 490, + 463, + 490, + 83, + 89, + true, + "source, reservoir, and seal", + "source, reservoir, and seal" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695974861, + null, + null, + 463, + 469, + 463, + 469, + 83, + 84, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734568227, + null, + null, + 471, + 480, + 471, + 480, + 85, + 86, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043560873, + null, + null, + 486, + 490, + 486, + 490, + 88, + 89, + true, + "seal", + "seal" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 10695328289155597291, + 4431862688730634730, + null, + null, + 492, + 561, + 492, + 561, + 90, + 105, + true, + "The source is the rock formation in which the oil or gas was created.", + "The source is the rock formation in which the oil or gas was created." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695972757, + null, + null, + 496, + 502, + 496, + 502, + 91, + 92, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 80968125733518558, + 7018196435505582631, + null, + null, + 510, + 524, + 510, + 524, + 94, + 96, + true, + "rock formation", + "rock formation" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 13992299006832689086, + 10586492155645346827, + null, + null, + 538, + 548, + 538, + 548, + 99, + 102, + true, + "oil or gas", + "oil or gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100857275, + null, + null, + 538, + 541, + 538, + 541, + 99, + 100, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568697250, + null, + null, + 545, + 548, + 545, + 548, + 101, + 102, + true, + "gas", + "gas" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 11895421850198242632, + 5509077001248181418, + null, + null, + 562, + 666, + 562, + 666, + 105, + 127, + true, + "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas.", + "Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas." + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 13992299006832689086, + 10586492155645246498, + null, + null, + 580, + 590, + 580, + 590, + 109, + 112, + true, + "oil or gas", + "oil or gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100848533, + null, + null, + 580, + 583, + 580, + 583, + 109, + 110, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568698065, + null, + null, + 587, + 590, + 587, + 590, + 111, + 112, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16193957251468249616, + 10512046748046668259, + null, + null, + 615, + 636, + 615, + 636, + 116, + 119, + true, + "porous reservoir rock", + "porous reservoir rock" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9418848057117014737, + 1685050495865720384, + null, + null, + 654, + 665, + 654, + 665, + 123, + 126, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100852102, + null, + null, + 654, + 657, + 654, + 657, + 123, + 124, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568694425, + null, + null, + 662, + 665, + 662, + 665, + 125, + 126, + true, + "gas", + "gas" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5929789915327270268, + 3037242968728546017, + null, + null, + 667, + 803, + 667, + 803, + 127, + 155, + true, + "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal.", + "In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 329104161571401725, + 9497325505729384728, + null, + null, + 670, + 675, + 670, + 675, + 128, + 129, + true, + "order", + "order" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9418848057117014737, + 1685050495865971311, + null, + null, + 684, + 695, + 684, + 695, + 131, + 134, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895623363, + 674497968100849695, + null, + null, + 684, + 687, + 684, + 687, + 131, + 132, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12178341415895464135, + 674505039568442669, + null, + null, + 692, + 695, + 692, + 695, + 133, + 134, + true, + "gas", + "gas" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734518002, + null, + null, + 715, + 724, + 715, + 724, + 139, + 140, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 9723882825591180683, + 9574840395838806322, + null, + null, + 751, + 777, + 751, + 777, + 146, + 149, + true, + "impermeable rock formation", + "impermeable rock formation" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043570043, + null, + null, + 798, + 802, + 798, + 802, + 153, + 154, + true, + "seal", + "seal" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 12828622475591605017, + 5782454436492965846, + null, + null, + 804, + 913, + 804, + 913, + 155, + 176, + true, + "Each one of these components is comprised of one or more formations, with a certain age and rock composition.", + "Each one of these components is comprised of one or more formations, with a certain age and rock composition." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498595058324, + null, + null, + 822, + 832, + 822, + 832, + 159, + 160, + true, + "components", + "components" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16064217528453934834, + 15050166161452165429, + null, + null, + 861, + 871, + 861, + 871, + 166, + 167, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 18043249325066169556, + 6240050066438421142, + null, + null, + 880, + 891, + 880, + 891, + 170, + 172, + true, + "certain age", + "certain age" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 3561668659819452356, + 7599493799741319648, + null, + null, + 888, + 912, + 888, + 912, + 171, + 175, + true, + "age and rock composition", + "age and rock composition" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 18031444749457032388, + 1223000296858575465, + null, + null, + 896, + 912, + 896, + 912, + 173, + 175, + true, + "rock composition", + "rock composition" + ], + [ + "sentence", + "proper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15154527065859701304, + 10511609858079856946, + null, + null, + 914, + 1162, + 914, + 1162, + 176, + 222, + true, + "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints.", + "To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints." + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2509987211733796739, + 16811136751140628834, + null, + null, + 928, + 944, + 928, + 944, + 179, + 181, + true, + "petroleum system", + "petroleum system" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15928864167064606327, + 6933788371051918523, + null, + null, + 950, + 975, + 950, + 975, + 183, + 186, + true, + "certain geographical area", + "certain geographical area" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6743492865849365205, + 1774365845575004327, + null, + null, + 995, + 1014, + 995, + 1014, + 192, + 194, + true, + "candidate formation", + "candidate formation" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 5947879501615734370, + 7820482070703241023, + null, + null, + 1024, + 1033, + 1024, + 1033, + 196, + 197, + true, + "component", + "component" + ], + [ + "parenthesis", + "round brackets", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 17270253253682717487, + 3576935745000233213, + null, + null, + 1034, + 1067, + 1034, + 1067, + 197, + 207, + true, + "(ie, reservoir, seal, and source)", + "(ie, reservoir, seal, and source)" + ], + [ + "term", + "enum-term-mark-2", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14224878169535431391, + 4999493091788788351, + null, + null, + 1035, + 1066, + 1035, + 1066, + 198, + 206, + true, + "ie, reservoir, seal, and source", + "ie, reservoir, seal, and source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 15441160910541486545, + 7242502590681773013, + null, + null, + 1035, + 1037, + 1035, + 1037, + 198, + 199, + true, + "ie", + "ie" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734612797, + null, + null, + 1039, + 1048, + 1039, + 1048, + 200, + 201, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 389609625741054314, + 410603660043461418, + null, + null, + 1050, + 1054, + 1050, + 1054, + 202, + 203, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16381206579112188113, + 14297335677695869230, + null, + null, + 1060, + 1066, + 1060, + 1066, + 205, + 206, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 14088628410271132453, + 4251286936900251711, + null, + null, + 1089, + 1099, + 1089, + 1099, + 211, + 212, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2703018952916355661, + 8840992498594647547, + null, + null, + 1109, + 1119, + 1109, + 1119, + 214, + 215, + true, + "components", + "components" + ], + [ + "expression", + "word-concatenation", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 16142298665920251723, + 15780159324829191742, + null, + null, + 1133, + 1149, + 1133, + 1149, + 217, + 220, + true, + "well-established", + "well-established" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 2343820404875251124, + 18235374725016729660, + null, + null, + 1150, + 1161, + 1150, + 1161, + 220, + 221, + true, + "constraints", + "constraints" + ], + [ + "sentence", + "improper", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 7956387888440268806, + 9171487369932444199, + null, + null, + 1163, + 1189, + 1163, + 1189, + 222, + 227, + true, + "For example, the reservoir", + "For example, the reservoir" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 8106397496085150773, + 8292722284435493161, + null, + null, + 1167, + 1174, + 1167, + 1174, + 223, + 224, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10633780781731536747, + "TEXT", + "#/texts/123", + 1.0, + 6168331670275357579, + 8243421751734523008, + null, + null, + 1180, + 1189, + 1180, + 1189, + 226, + 227, + true, + "reservoir", + "reservoir" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/124", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 1080447728722590413, + "TEXT", + "#/texts/125", + 1.0, + 15441160910541481976, + 12490743152134877753, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "12", + "12" + ], + [ + "sentence", + "improper", + 4361549257087816853, + "TEXT", + "#/texts/126", + 1.0, + 15441160910541485670, + 9983842722140753537, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549257087816853, + "TEXT", + "#/texts/126", + 1.0, + 15441160910541481979, + 9983816787922721487, + null, + null, + 3, + 5, + 3, + 5, + 1, + 2, + true, + "15", + "15" + ], + [ + "sentence", + "improper", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 12390004558367100719, + 10045326222207556847, + null, + null, + 0, + 60, + 0, + 60, + 0, + 12, + true, + "formation has to have a lower depth than the seal formation.", + "formation has to have a lower depth than the seal formation." + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 6187533480885532545, + 7871889926557155024, + null, + null, + 0, + 9, + 0, + 9, + 0, + 1, + true, + "formation", + "formation" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 329104162100250438, + 16247133124509719242, + null, + null, + 30, + 35, + 30, + 35, + 6, + 7, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 8780157828309296089, + 14417774072283982472, + null, + null, + 45, + 59, + 45, + 59, + 9, + 11, + true, + "seal formation", + "seal formation" + ], + [ + "sentence", + "proper", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 15802078051467200825, + 4593559971105037353, + null, + null, + 61, + 171, + 61, + 171, + 12, + 34, + true, + "Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", + "Another example of such constraints is that the age of the seal and reservoir has to be older than the source." + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 8106397496085150773, + 10113590852616268300, + null, + null, + 69, + 76, + 69, + 76, + 13, + 14, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 13876553311005799387, + 6354882104637684429, + null, + null, + 80, + 96, + 80, + 96, + 15, + 17, + true, + "such constraints", + "such constraints" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 12178341415895571749, + 13993372577473076189, + null, + null, + 109, + 112, + 109, + 112, + 20, + 21, + true, + "age", + "age" + ], + [ + "term", + "enum-term-mark-2", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 16756161176734575985, + 10638546327685693256, + null, + null, + 120, + 138, + 120, + 138, + 23, + 26, + true, + "seal and reservoir", + "seal and reservoir" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 389609625741054314, + 7672668339257182848, + null, + null, + 120, + 124, + 120, + 124, + 23, + 24, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 6168331670275357579, + 17711145650520030642, + null, + null, + 129, + 138, + 129, + 138, + 25, + 26, + true, + "reservoir", + "reservoir" + ], + [ + "term", + "single-term", + 10195664788154887804, + "TEXT", + "#/texts/127", + 1.0, + 16381206579112188113, + 6632699352491893262, + null, + null, + 164, + 170, + 164, + 170, + 32, + 33, + true, + "source", + "source" + ], + [ + "sentence", + "proper", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 1705537891001951581, + 12643174622978093931, + null, + null, + 0, + 141, + 0, + 141, + 0, + 30, + true, + "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives.", + "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives." + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 329104161571401725, + 10954679443872088477, + null, + null, + 3, + 8, + 3, + 8, + 1, + 2, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12779036928191531604, + 2179291459161706341, + null, + null, + 17, + 29, + 17, + 29, + 4, + 6, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "enum-term-mark-2", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 9418848057117014737, + 12374664467097755932, + null, + null, + 42, + 53, + 42, + 53, + 9, + 12, + true, + "oil and gas", + "oil and gas" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415895623363, + 17947552822877265640, + null, + null, + 42, + 45, + 42, + 45, + 9, + 10, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 3400754800868514192, + 11934221638746188525, + null, + null, + 50, + 71, + 50, + 71, + 11, + 13, + true, + "gas explorationalists", + "gas explorationalists" + ], + [ + "expression", + "word-concatenation", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 5576784674045870024, + 13038592085786715991, + null, + null, + 81, + 91, + 81, + 91, + 15, + 20, + true, + "day-to-day", + "day-to-day" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415895453614, + 17965535433478365771, + null, + null, + 81, + 84, + 81, + 84, + 15, + 16, + true, + "day", + "day" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 8106396676567683370, + 16597025298636850613, + null, + null, + 88, + 95, + 88, + 95, + 19, + 21, + true, + "day job", + "day job" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15868223159689591859, + 14476383799407961998, + null, + null, + 130, + 140, + 130, + 140, + 28, + 29, + true, + "objectives", + "objectives" + ], + [ + "sentence", + "proper", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 6964031938346981228, + 2679069447819445922, + null, + null, + 142, + 230, + 142, + 230, + 30, + 48, + true, + "On the one hand, it needs to create a consistent Knowledge Graph from a document corpus.", + "On the one hand, it needs to create a consistent Knowledge Graph from a document corpus." + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 389609625695385072, + 11440288346750521048, + null, + null, + 153, + 157, + 153, + 157, + 33, + 34, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2568402610530935991, + 14140482534604978282, + null, + null, + 180, + 206, + 180, + 206, + 40, + 43, + true, + "consistent Knowledge Graph", + "consistent Knowledge Graph" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 16647308723752369903, + 14738883853389812839, + null, + null, + 214, + 229, + 214, + 229, + 45, + 47, + true, + "document corpus", + "document corpus" + ], + [ + "sentence", + "proper", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 17535387160187070973, + 11137257841253658893, + null, + null, + 231, + 389, + 231, + 389, + 48, + 76, + true, + "This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition).", + "This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition)." + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 5877539623435777295, + 11286491693883501248, + null, + null, + 236, + 251, + 236, + 251, + 49, + 51, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 9648537698556423826, + 6238309560067342359, + null, + null, + 271, + 292, + 271, + 292, + 55, + 57, + true, + "geological formations", + "geological formations" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 10514013392853408912, + 3902356967922595314, + null, + null, + 304, + 325, + 304, + 325, + 59, + 61, + true, + "respective properties", + "respective properties" + ], + [ + "parenthesis", + "round brackets", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18003855556286774784, + 5570542864698798195, + null, + null, + 326, + 388, + 326, + 388, + 61, + 75, + true, + "(eg, geographical locations, depth, age, and rock composition)", + "(eg, geographical locations, depth, age, and rock composition)" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15441160910541487324, + 15189263452160795700, + null, + null, + 327, + 329, + 327, + 329, + 62, + 63, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 15051916633168881147, + 4042421737129645550, + null, + null, + 331, + 353, + 331, + 353, + 64, + 66, + true, + "geographical locations", + "geographical locations" + ], + [ + "term", + "enum-term-mark-2", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 6860541340972856268, + 17956047888006168723, + null, + null, + 355, + 387, + 355, + 387, + 67, + 74, + true, + "depth, age, and rock composition", + "depth, age, and rock composition" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 329104162100250438, + 11306285140255820122, + null, + null, + 355, + 360, + 355, + 360, + 67, + 68, + true, + "depth", + "depth" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415895571749, + 17965533017402487143, + null, + null, + 362, + 365, + 362, + 365, + 69, + 70, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18031444749457032388, + 12929696333857038410, + null, + null, + 371, + 387, + 371, + 387, + 72, + 74, + true, + "rock composition", + "rock composition" + ], + [ + "sentence", + "proper", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14586761392565722761, + 12189326748130292090, + null, + null, + 390, + 596, + 390, + 596, + 76, + 110, + true, + "On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", + "On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates." + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14046205808324278415, + 9078231314066883308, + null, + null, + 397, + 407, + 397, + 407, + 78, + 80, + true, + "other hand", + "other hand" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 12178341415896222428, + 17965292451507993990, + null, + null, + 409, + 412, + 409, + 412, + 81, + 82, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2280710246315749839, + 14080640540713416540, + null, + null, + 430, + 450, + 430, + 450, + 85, + 88, + true, + "fast query responses", + "fast query responses" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 18259199261543580951, + 14446619061773297986, + null, + null, + 493, + 513, + 493, + 513, + 95, + 97, + true, + "potential components", + "potential components" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 1727412062449779824, + 6279132683973492637, + null, + null, + 517, + 534, + 517, + 534, + 98, + 100, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 2343820404875251124, + 11932898287921936471, + null, + null, + 549, + 560, + 549, + 560, + 103, + 104, + true, + "constraints", + "constraints" + ], + [ + "term", + "single-term", + 7538054744015619336, + "TEXT", + "#/texts/128", + 1.0, + 14381095619961675188, + 12908833652765352656, + null, + null, + 575, + 595, + 575, + 595, + 107, + 109, + true, + "promising candidates", + "promising candidates" + ], + [ + "sentence", + "proper", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6825934840363073982, + 5636233081723674917, + null, + null, + 0, + 267, + 0, + 267, + 0, + 48, + true, + "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified.", + "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified." + ], + [ + "term", + "enum-term-mark-2", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 13768691840532369655, + 11193246741347243418, + null, + null, + 11, + 41, + 11, + 41, + 2, + 5, + true, + "development and implementation", + "development and implementation" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1525875096007260836, + 12171024053502893659, + null, + null, + 11, + 22, + 11, + 22, + 2, + 3, + true, + "development", + "development" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 16770038681622514616, + 13377631723935101023, + null, + null, + 27, + 41, + 27, + 41, + 4, + 5, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 4571012442155812864, + 10278457919107470617, + null, + null, + 45, + 66, + 45, + 66, + 6, + 9, + true, + "custom NLU annotators", + "custom NLU annotators" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415896222428, + 18419530096214942692, + null, + null, + 70, + 73, + 70, + 73, + 10, + 11, + true, + "CPS", + "CPS" + ], + [ + "term", + "enum-term-mark-2", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 5515747999597331548, + 7333264710061334239, + null, + null, + 78, + 101, + 78, + 101, + 12, + 16, + true, + "oil and gas exploration", + "oil and gas exploration" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415895623363, + 18419432310300434890, + null, + null, + 78, + 81, + 78, + 81, + 12, + 13, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10692163443301812358, + 14500118643984057055, + null, + null, + 86, + 101, + 86, + 101, + 14, + 16, + true, + "gas exploration", + "gas exploration" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 2350671729723156275, + 15558555620402818749, + null, + null, + 107, + 118, + 107, + 118, + 18, + 20, + true, + "client team", + "client team" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625695385072, + 15858014558261910158, + null, + null, + 126, + 130, + 126, + 130, + 21, + 22, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625695385072, + 15858014558262095076, + null, + null, + 134, + 138, + 134, + 138, + 23, + 24, + true, + "hand", + "hand" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6793739009103508672, + 6386853708445382627, + null, + null, + 148, + 165, + 148, + 165, + 26, + 29, + true, + "IBM Research team", + "IBM Research team" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12051423055985186664, + 17929859788470785671, + null, + null, + 189, + 207, + 189, + 207, + 34, + 36, + true, + "accuracy benchmark", + "accuracy benchmark" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10510333910970178582, + 1646640126800442890, + null, + null, + 221, + 237, + 221, + 237, + 39, + 41, + true, + "key capabilities", + "key capabilities" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 12178341415896222428, + 18419530096214998128, + null, + null, + 245, + 248, + 245, + 248, + 43, + 44, + true, + "CPS", + "CPS" + ], + [ + "sentence", + "proper", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 14290172583784870712, + 15927376679214014989, + null, + null, + 268, + 410, + 268, + 410, + 48, + 78, + true, + "The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG.", + "The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG." + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 389609625699055241, + 15857990529270421118, + null, + null, + 272, + 276, + 272, + 276, + 49, + 50, + true, + "goal", + "goal" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 5948160876453582848, + 11544211964230154399, + null, + null, + 284, + 293, + 284, + 293, + 52, + 53, + true, + "benchmark", + "benchmark" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1949498199725672567, + 18146522528945762773, + null, + null, + 310, + 325, + 310, + 325, + 57, + 59, + true, + "entire pipeline", + "entire pipeline" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 16381206514091025767, + 1446474516282156831, + null, + null, + 338, + 344, + 338, + 344, + 61, + 62, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 17767354399704235158, + 13179516689827493860, + null, + null, + 345, + 346, + 345, + 346, + 62, + 63, + true, + "6", + "6" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 1297756121731734348, + 7733414860626740361, + null, + null, + 362, + 384, + 362, + 384, + 68, + 71, + true, + "PDF document ingestion", + "PDF document ingestion" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 13017693093872726154, + 10279474879437280242, + null, + null, + 397, + 409, + 397, + 409, + 75, + 77, + true, + "queryable KG", + "queryable KG" + ], + [ + "sentence", + "improper", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 15728969365616328105, + 17085098925010968686, + null, + null, + 411, + 460, + 411, + 460, + 78, + 87, + true, + "The key components of this specific pipeline are,", + "The key components of this specific pipeline are," + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 6381120898605443461, + 17504710944887042389, + null, + null, + 415, + 429, + 415, + 429, + 79, + 81, + true, + "key components", + "key components" + ], + [ + "term", + "single-term", + 12426662601736619109, + "TEXT", + "#/texts/129", + 1.0, + 10127059533904232000, + 15955758164972743780, + null, + null, + 438, + 455, + 438, + 455, + 83, + 85, + true, + "specific pipeline", + "specific pipeline" + ], + [ + "numval", + "ival", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 17767354399704235161, + 16668792304570951258, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "1", + "1" + ], + [ + "sentence", + "improper", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 11889799938246613874, + 1416225359394283175, + null, + null, + 1, + 57, + 1, + 57, + 1, + 12, + true, + ". the conversion of PDF documents into JSON through CCS,", + ". the conversion of PDF documents into JSON through CCS," + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 2703018679320364082, + 10926776708418742663, + null, + null, + 7, + 17, + 7, + 17, + 3, + 4, + true, + "conversion", + "conversion" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 12653831733608918357, + 7130150499537747604, + null, + null, + 21, + 34, + 21, + 34, + 5, + 7, + true, + "PDF documents", + "PDF documents" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 389609625541450799, + 1148415792138977757, + null, + null, + 40, + 44, + 40, + 44, + 8, + 9, + true, + "JSON", + "JSON" + ], + [ + "term", + "single-term", + 4162783521620221579, + "TEXT", + "#/texts/130", + 1.0, + 12178341415896221596, + 1383368125015642049, + null, + null, + 53, + 56, + 53, + 56, + 10, + 11, + true, + "CCS", + "CCS" + ], + [ + "numval", + "ival", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 17767354399704235162, + 17330663619054335778, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "2", + "2" + ], + [ + "sentence", + "improper", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 10360766580882649633, + 9418275448049882729, + null, + null, + 1, + 65, + 1, + 65, + 1, + 16, + true, + ". the creation of the KG in the CPS from the JSON documents, and", + ". the creation of the KG in the CPS from the JSON documents, and" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 14652282930648707075, + 1296612546179459976, + null, + null, + 7, + 15, + 7, + 15, + 3, + 4, + true, + "creation", + "creation" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 15441160910541480204, + 13433689011768330761, + null, + null, + 23, + 25, + 23, + 25, + 6, + 7, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 12178341415896222428, + 8639440310989100808, + null, + null, + 33, + 36, + 33, + 36, + 9, + 10, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 5135259059216244866, + "TEXT", + "#/texts/131", + 1.0, + 2351632970423386126, + 8632517790462141146, + null, + null, + 46, + 60, + 46, + 60, + 12, + 14, + true, + "JSON documents", + "JSON documents" + ], + [ + "numval", + "ival", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 17767354399704235163, + 14373480556157138435, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "3", + "3" + ], + [ + "sentence", + "improper", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 7192632164357775195, + 2976029623734261195, + null, + null, + 1, + 22, + 1, + 22, + 1, + 6, + true, + ". the querying of the", + ". the querying of the" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 14637920980029577773, + 100201578071126401, + null, + null, + 7, + 15, + 7, + 15, + 3, + 4, + true, + "querying", + "querying" + ], + [ + "sentence", + "proper", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 13977281577432050469, + 10430722213677949058, + null, + null, + 23, + 101, + 23, + 101, + 6, + 19, + true, + "KG served by CPS to identify petroleum systems elements with their properties.", + "KG served by CPS to identify petroleum systems elements with their properties." + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 15441160910541480204, + 3387988993019039764, + null, + null, + 23, + 25, + 23, + 25, + 6, + 7, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 12178341415896222428, + 12208991191022865237, + null, + null, + 36, + 39, + 36, + 39, + 9, + 10, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 715794125007296180, + 12795993613527751497, + null, + null, + 52, + 78, + 52, + 78, + 12, + 15, + true, + "petroleum systems elements", + "petroleum systems elements" + ], + [ + "term", + "single-term", + 16998817296948099535, + "TEXT", + "#/texts/132", + 1.0, + 14088628410271132453, + 11692902669902558965, + null, + null, + 90, + 100, + 90, + 100, + 17, + 18, + true, + "properties", + "properties" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 7581777221421061873, + 14978907642458194092, + null, + null, + 0, + 157, + 0, + 154, + 0, + 29, + true, + "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset.", + "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14105815281071030459, + 8762836771533552880, + null, + null, + 7, + 17, + 7, + 17, + 2, + 3, + true, + "suggestion", + "suggestion" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106397495764760483, + 5542432659059451382, + null, + null, + 25, + 32, + 25, + 32, + 5, + 6, + true, + "experts", + "experts" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 2350671729723156275, + 11828949847746348501, + null, + null, + 40, + 51, + 40, + 51, + 8, + 10, + true, + "client team", + "client team" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 1949498199725672567, + 5862741285729980896, + null, + null, + 57, + 72, + 57, + 72, + 12, + 14, + true, + "entire pipeline", + "entire pipeline" + ], + [ + "numval", + "ival", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625536078676, + 4142990959296314501, + null, + null, + 88, + 92, + 88, + 92, + 18, + 19, + true, + "1051", + "1051" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 6071418746771287636, + 13853529843892164023, + null, + null, + 93, + 117, + 93, + 117, + 19, + 22, + true, + "Field Evaluation Reports", + "Field Evaluation Reports" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16836628269428371418, + 18082386177451915318, + null, + null, + 127, + 141, + 127, + 141, + 24, + 26, + true, + "C&C Reservoirs", + "C&C Reservoirs" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106396676716241904, + 3065370629985052298, + null, + null, + 149, + 156, + 146, + 153, + 27, + 28, + true, + "dataset", + "dataset" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5445384521585333988, + 6675391978778171870, + null, + null, + 158, + 259, + 155, + 256, + 29, + 47, + true, + "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts.", + "The advantage of using this dataset for an accuracy benchmark is that each report includes two parts." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5946904284821171904, + 7296542983005317042, + null, + null, + 162, + 171, + 159, + 168, + 30, + 31, + true, + "advantage", + "advantage" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106396676716241904, + 3065370629985057888, + null, + null, + 186, + 193, + 183, + 190, + 34, + 35, + true, + "dataset", + "dataset" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12051423055985186664, + 5239327927281905732, + null, + null, + 201, + 219, + 198, + 216, + 37, + 39, + true, + "accuracy benchmark", + "accuracy benchmark" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206521507679731, + 6908716631963244547, + null, + null, + 233, + 239, + 230, + 236, + 42, + 43, + true, + "report", + "report" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 329104161667984155, + 6486912841380755591, + null, + null, + 253, + 258, + 250, + 255, + 45, + 46, + true, + "parts", + "parts" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 9483359608935776482, + 14368716291555426974, + null, + null, + 260, + 350, + 257, + 347, + 47, + 64, + true, + "One part is verbose text describing the history, evolution, and composition of the fields.", + "One part is verbose text describing the history, evolution, and composition of the fields." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14652437434623351008, + 17767392433408645928, + null, + null, + 260, + 268, + 257, + 265, + 47, + 49, + true, + "One part", + "One part" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3514630383766601368, + 16756472043394057148, + null, + null, + 272, + 284, + 269, + 281, + 50, + 52, + true, + "verbose text", + "verbose text" + ], + [ + "term", + "enum-term-mark-2", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3005704999990963973, + 4834246923337609034, + null, + null, + 300, + 335, + 297, + 332, + 54, + 60, + true, + "history, evolution, and composition", + "history, evolution, and composition" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106398477819293336, + 16490512145767497936, + null, + null, + 300, + 307, + 297, + 304, + 54, + 55, + true, + "history", + "history" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 6172089554353143931, + 17169604842729845897, + null, + null, + 309, + 318, + 306, + 315, + 56, + 57, + true, + "evolution", + "evolution" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14749101077007455096, + 15429073757137335434, + null, + null, + 324, + 335, + 321, + 332, + 59, + 60, + true, + "composition", + "composition" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206548906499597, + 366300192401501745, + null, + null, + 343, + 349, + 340, + 346, + 62, + 63, + true, + "fields", + "fields" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 11210731674048684277, + 54501834201149568, + null, + null, + 351, + 490, + 348, + 487, + 64, + 86, + true, + "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline.", + "The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14639581537964510688, + 9158240592054921045, + null, + null, + 355, + 363, + 352, + 360, + 65, + 66, + true, + "language", + "language" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14203028525020218648, + 7178394498907141554, + null, + null, + 375, + 393, + 372, + 390, + 69, + 71, + true, + "similar complexity", + "similar complexity" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5292062602866596698, + 13702278590678261576, + null, + null, + 397, + 429, + 394, + 426, + 72, + 75, + true, + "standard geological publications", + "standard geological publications" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 420313777628708468, + 3530977542162910034, + null, + null, + 441, + 460, + 438, + 457, + 78, + 80, + true, + "realistic challenge", + "realistic challenge" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14857819661511796263, + 17596023666875428212, + null, + null, + 469, + 489, + 466, + 486, + 82, + 85, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 4149301380723366406, + 2966292617771160741, + null, + null, + 491, + 656, + 488, + 653, + 86, + 116, + true, + "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties.", + "The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 2169253085456814700, + 15744012511127381696, + null, + null, + 495, + 506, + 492, + 503, + 87, + 89, + true, + "second part", + "second part" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12178341415895456504, + 3484447794018666097, + null, + null, + 514, + 517, + 511, + 514, + 91, + 92, + true, + "end", + "end" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206521507679731, + 6908716631964218344, + null, + null, + 526, + 532, + 523, + 529, + 94, + 95, + true, + "report", + "report" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677655957, + null, + null, + 549, + 555, + 546, + 552, + 98, + 99, + true, + "tables", + "tables" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525230310, + null, + null, + 576, + 580, + 573, + 577, + 102, + 103, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14652262331391540004, + 10527382338693157586, + null, + null, + 600, + 608, + 597, + 605, + 107, + 108, + true, + "elements", + "elements" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 1727412062449779824, + 13214726213044247883, + null, + null, + 616, + 633, + 613, + 630, + 110, + 112, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 14088628410271132453, + 4995216546732910011, + null, + null, + 645, + 655, + 642, + 652, + 114, + 115, + true, + "properties", + "properties" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16090785368672247706, + 15812168781097489089, + null, + null, + 657, + 734, + 654, + 731, + 116, + 131, + true, + "Therefore, we ingest these reports into CCS and extract both text and tables.", + "Therefore, we ingest these reports into CCS and extract both text and tables." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106478449073306569, + 9696718968531674549, + null, + null, + 684, + 691, + 681, + 688, + 121, + 122, + true, + "reports", + "reports" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 12178341415896221596, + 3486979654156411937, + null, + null, + 697, + 700, + 694, + 697, + 123, + 124, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525233018, + null, + null, + 718, + 722, + 715, + 719, + 127, + 128, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677603449, + null, + null, + 727, + 733, + 724, + 730, + 129, + 130, + true, + "tables", + "tables" + ], + [ + "sentence", + "proper", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3726384155577721767, + 2194562319932255346, + null, + null, + 735, + 923, + 732, + 920, + 131, + 173, + true, + "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", + "Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark." + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 15441160910541480204, + 3458773221742021286, + null, + null, + 757, + 759, + 754, + 756, + 136, + 137, + true, + "KG", + "KG" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 389609625631325904, + 4140854756525217952, + null, + null, + 774, + 778, + 771, + 775, + 140, + 141, + true, + "text", + "text" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206513098478539, + 624667531677607536, + null, + null, + 795, + 801, + 792, + 798, + 144, + 145, + true, + "tables", + "tables" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 3753411203337468488, + 1771253405748692981, + null, + null, + 805, + 817, + 802, + 814, + 146, + 149, + true, + "ground-truth", + "ground-truth" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16381206541509431009, + 464045395139760264, + null, + null, + 805, + 811, + 802, + 808, + 146, + 147, + true, + "ground", + "ground" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 329104159241711235, + 488439069266268963, + null, + null, + 812, + 817, + 809, + 814, + 148, + 149, + true, + "truth", + "truth" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8106397678203715209, + 2241732074401283122, + null, + null, + 829, + 836, + 826, + 833, + 151, + 152, + true, + "answers", + "answers" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 8339342696999135929, + 196460050606710926, + null, + null, + 844, + 854, + 841, + 851, + 154, + 156, + true, + "KG queries", + "KG queries" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16502743316004277231, + 16326952748376776172, + null, + null, + 876, + 891, + 873, + 888, + 161, + 164, + true, + "well-controlled", + "well-controlled" + ], + [ + "expression", + "word-concatenation", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 5305843656069465459, + 342229054274366707, + null, + null, + 893, + 903, + 890, + 900, + 165, + 170, + true, + "end-to-end", + "end-to-end" + ], + [ + "term", + "single-term", + 1205649569241141618, + "TEXT", + "#/texts/133", + 1.0, + 16481284809726224751, + 2401167927031843510, + null, + null, + 900, + 922, + 897, + 919, + 169, + 172, + true, + "end accuracy benchmark", + "end accuracy benchmark" + ], + [ + "sentence", + "proper", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6264093697057942470, + 11535554290393356965, + null, + null, + 0, + 140, + 0, + 140, + 0, + 32, + true, + "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages.", + "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages." + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625741100019, + 12273254624188070437, + null, + null, + 4, + 8, + 4, + 8, + 1, + 2, + true, + "step", + "step" + ], + [ + "parenthesis", + "reference", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896395122, + 2035619839814699426, + null, + null, + 9, + 12, + 9, + 12, + 2, + 5, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 17767354399704235161, + 10009347220024759156, + null, + null, + 10, + 11, + 10, + 11, + 3, + 4, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 14814125852840540191, + 1277876793308303433, + null, + null, + 20, + 28, + 20, + 28, + 7, + 8, + true, + "pipeline", + "pipeline" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625536078676, + 11597977105526404591, + null, + null, + 46, + 50, + 46, + 50, + 12, + 13, + true, + "1051", + "1051" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625526197745, + 11614011427733790335, + null, + null, + 51, + 55, + 51, + 55, + 13, + 14, + true, + "PDFs", + "PDFs" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896221596, + 2034623361247365679, + null, + null, + 61, + 64, + 61, + 64, + 15, + 16, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 86072878302920231, + 17579708504691528419, + null, + null, + 92, + 110, + 92, + 110, + 20, + 22, + true, + "document structure", + "document structure" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896435064, + 2035594838057841276, + null, + null, + 114, + 117, + 114, + 117, + 23, + 24, + true, + "300", + "300" + ], + [ + "parenthesis", + "round brackets", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 11766986595381952604, + 6865818640579361072, + null, + null, + 118, + 133, + 118, + 133, + 24, + 30, + true, + "(out of 46 019)", + "(out of 46 019)" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 15441160910541486270, + 17171794145981856951, + null, + null, + 126, + 128, + 126, + 128, + 27, + 28, + true, + "46", + "46" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12178341415896430817, + 2035594491968753454, + null, + null, + 129, + 132, + 129, + 132, + 28, + 29, + true, + "019", + "019" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104161667992688, + 12056718547458487792, + null, + null, + 134, + 139, + 134, + 139, + 30, + 31, + true, + "pages", + "pages" + ], + [ + "sentence", + "proper", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 5193352564005743212, + 15434324392582832918, + null, + null, + 141, + 290, + 141, + 290, + 32, + 63, + true, + "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure.", + "This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure." + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 13968810273524073925, + 1064748835305933718, + null, + null, + 156, + 166, + 156, + 166, + 35, + 37, + true, + "page model", + "page model" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6167933651658664291, + 10817420032196180216, + null, + null, + 198, + 207, + 198, + 207, + 41, + 42, + true, + "documents", + "documents" + ], + [ + "expression", + "wtoken-concatenation", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104147618004591, + 11574052002680847144, + null, + null, + 230, + 235, + 230, + 235, + 47, + 51, + true, + "99.7%", + "99.7%" + ], + [ + "numval", + "fval", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625534532312, + 11597792617376893235, + null, + null, + 230, + 234, + 230, + 234, + 47, + 50, + true, + "99.7", + "99.7" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 14654259136923279336, + 1953334373968309661, + null, + null, + 234, + 242, + 234, + 242, + 50, + 52, + true, + "% recall", + "% recall" + ], + [ + "expression", + "wtoken-concatenation", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 329104147617972580, + 11574098081966624121, + null, + null, + 247, + 252, + 247, + 252, + 53, + 57, + true, + "99.3%", + "99.3%" + ], + [ + "numval", + "fval", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625534532316, + 11597792631633065669, + null, + null, + 247, + 251, + 247, + 251, + 53, + 56, + true, + "99.3", + "99.3" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 17644067776568466726, + 18060090887622708169, + null, + null, + 251, + 262, + 251, + 262, + 56, + 58, + true, + "% precision", + "% precision" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 12014656692274133724, + 13617999399581485717, + null, + null, + 270, + 289, + 270, + 289, + 60, + 62, + true, + "converted structure", + "converted structure" + ], + [ + "sentence", + "proper", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6552107417335024489, + 5237260083709821013, + null, + null, + 291, + 359, + 291, + 359, + 63, + 76, + true, + "These numbers are in line with those reported in our previous works.", + "These numbers are in line with those reported in our previous works." + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8106352625329644634, + 6390816065172833166, + null, + null, + 297, + 304, + 297, + 304, + 64, + 65, + true, + "numbers", + "numbers" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 389609625633316261, + 12301982887694167440, + null, + null, + 312, + 316, + 312, + 316, + 67, + 68, + true, + "line", + "line" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 270007398742696754, + 7824217601819143418, + null, + null, + 344, + 358, + 344, + 358, + 73, + 75, + true, + "previous works", + "previous works" + ], + [ + "numval", + "ival", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 17767354399704235161, + 10009347220024810595, + null, + null, + 360, + 361, + 360, + 361, + 76, + 77, + true, + "1", + "1" + ], + [ + "sentence", + "proper", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8820460630403895169, + 17593037612846824792, + null, + null, + 362, + 569, + 362, + 569, + 77, + 109, + true, + "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", + "Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer." + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 5715923267297430455, + 797212159439962662, + null, + null, + 380, + 407, + 380, + 407, + 80, + 83, + true, + "accurate conversion results", + "accurate conversion results" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 8106477781724488761, + 10618946517388681676, + null, + null, + 433, + 440, + 433, + 440, + 88, + 89, + true, + "quality", + "quality" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 2136116818459714255, + 1164324304037097344, + null, + null, + 462, + 481, + 462, + 481, + 93, + 95, + true, + "language annotators", + "language annotators" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 2655104503757432456, + 6956823932481722640, + null, + null, + 495, + 510, + 495, + 510, + 97, + 99, + true, + "incomplete data", + "incomplete data" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 6165970819764784401, + 16497601699560813235, + null, + null, + 530, + 539, + 530, + 539, + 102, + 103, + true, + "relevance", + "relevance" + ], + [ + "term", + "single-term", + 12257840490666828590, + "TEXT", + "#/texts/134", + 1.0, + 16172227578405589462, + 15021035824619292483, + null, + null, + 543, + 556, + 543, + 556, + 104, + 106, + true, + "query results", + "query results" + ], + [ + "sentence", + "proper", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 18115937449900154415, + 5770214175319405773, + null, + null, + 0, + 146, + 0, + 146, + 0, + 29, + true, + "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain.", + "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain." + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625741100019, + 9977938209978319206, + null, + null, + 3, + 7, + 3, + 7, + 1, + 2, + true, + "step", + "step" + ], + [ + "parenthesis", + "reference", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896395187, + 3013785139620202598, + null, + null, + 8, + 11, + 8, + 11, + 2, + 5, + true, + "(2)", + "(2)" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 17767354399704235162, + 9584179333675572235, + null, + null, + 9, + 10, + 9, + 10, + 3, + 4, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 5877539623435777295, + 7936713845012512320, + null, + null, + 27, + 42, + 27, + 42, + 9, + 11, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541480770, + 17325109556329758529, + null, + null, + 58, + 60, + 58, + 60, + 14, + 15, + true, + "DF", + "DF" + ], + [ + "term", + "enum-term-mark-3", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 13335488353876392384, + 7462007142456652917, + null, + null, + 88, + 114, + 88, + 114, + 20, + 23, + true, + "entities and relationships", + "entities and relationships" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14652256560445338257, + 1416445963682787535, + null, + null, + 88, + 96, + 88, + 96, + 20, + 21, + true, + "entities", + "entities" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 8279380567349713241, + 12542603645636875495, + null, + null, + 101, + 114, + 101, + 114, + 22, + 23, + true, + "relationships", + "relationships" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 17565352035883069521, + 7876518838855380190, + null, + null, + 131, + 145, + 131, + 145, + 26, + 28, + true, + "geology domain", + "geology domain" + ], + [ + "sentence", + "proper", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 371194023314892743, + 8157120343645603021, + null, + null, + 147, + 362, + 147, + 362, + 29, + 74, + true, + "Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir).", + "Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir)." + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 11168880613841244046, + 3738855882894420837, + null, + null, + 151, + 176, + 151, + 176, + 30, + 33, + true, + "language annotator models", + "language annotator models" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 5929451728855710538, + 8499314494626515114, + null, + null, + 189, + 221, + 189, + 221, + 35, + 39, + true, + "geology extract geographic areas", + "geology extract geographic areas" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15928367849318151150, + 16059037534864104668, + null, + null, + 223, + 244, + 223, + 244, + 40, + 42, + true, + "geological structures", + "geological structures" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 8493668881347689613, + 1830285721893425217, + null, + null, + 245, + 257, + 245, + 257, + 42, + 47, + true, + "(eg, basins)", + "(eg, basins)" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541487324, + 17325105369339186228, + null, + null, + 246, + 248, + 246, + 248, + 43, + 44, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206570221872041, + 7952037600581639736, + null, + null, + 250, + 256, + 250, + 256, + 45, + 46, + true, + "basins", + "basins" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16064217528453934834, + 8232308350310476871, + null, + null, + 259, + 269, + 259, + 269, + 48, + 49, + true, + "formations", + "formations" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625700777197, + 9978557737437573440, + null, + null, + 271, + 275, + 271, + 275, + 50, + 51, + true, + "ages", + "ages" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161637315394, + 10899605415371114622, + null, + null, + 277, + 282, + 277, + 282, + 52, + 53, + true, + "rocks", + "rocks" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 1727412062449779824, + 8798729288605233062, + null, + null, + 284, + 301, + 284, + 301, + 54, + 56, + true, + "petroleum systems", + "petroleum systems" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14652262331391540004, + 4323520615039764616, + null, + null, + 313, + 321, + 313, + 321, + 59, + 60, + true, + "elements", + "elements" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104053344678624, + 11470830176304757031, + null, + null, + 322, + 327, + 322, + 327, + 60, + 63, + true, + "(PSE)", + "(PSE)" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896290846, + 3013759111972227093, + null, + null, + 323, + 326, + 323, + 326, + 61, + 62, + true, + "PSE", + "PSE" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 7354314755257879414, + 2977258323293868752, + null, + null, + 328, + 361, + 328, + 361, + 63, + 73, + true, + "(eg, seal, source, and reservoir)", + "(eg, seal, source, and reservoir)" + ], + [ + "term", + "enum-term-mark-2", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 10684476924193845757, + 17883303725652713608, + null, + null, + 329, + 360, + 329, + 360, + 64, + 72, + true, + "eg, seal, source, and reservoir", + "eg, seal, source, and reservoir" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541487324, + 17325105369339181085, + null, + null, + 329, + 331, + 329, + 331, + 64, + 65, + true, + "eg", + "eg" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625741054314, + 9977936215248775844, + null, + null, + 333, + 337, + 333, + 337, + 66, + 67, + true, + "seal", + "seal" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206579112188113, + 8408987821758031825, + null, + null, + 339, + 345, + 339, + 345, + 68, + 69, + true, + "source", + "source" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 6168331670275357579, + 3530228493066473595, + null, + null, + 351, + 360, + 351, + 360, + 71, + 72, + true, + "reservoir", + "reservoir" + ], + [ + "sentence", + "proper", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 60590952645855625, + 9133748810633132730, + null, + null, + 363, + 486, + 363, + 486, + 74, + 102, + true, + "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs).", + "Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs)." + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104159242619871, + 10528040562907345525, + null, + null, + 387, + 392, + 387, + 392, + 79, + 80, + true, + "total", + "total" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625655454200, + 9968482244883150940, + null, + null, + 396, + 400, + 396, + 400, + 81, + 82, + true, + "4597", + "4597" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625526136278, + 9992496440036513980, + null, + null, + 401, + 405, + 401, + 405, + 82, + 83, + true, + "PSEs", + "PSEs" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625533565630, + 9993658546277119180, + null, + null, + 407, + 411, + 407, + 411, + 84, + 85, + true, + "8811", + "8811" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16064217528453934834, + 8232308350310450335, + null, + null, + 412, + 422, + 412, + 422, + 85, + 86, + true, + "formations", + "formations" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896307158, + 3013762769356241010, + null, + null, + 424, + 427, + 424, + 427, + 87, + 88, + true, + "471", + "471" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 9663226904190425014, + 17830790977056937497, + null, + null, + 428, + 443, + 428, + 443, + 88, + 90, + true, + "geological ages", + "geological ages" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541481167, + 17325109575647682885, + null, + null, + 449, + 451, + 449, + 451, + 92, + 93, + true, + "64", + "64" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15981982758112403734, + 12205103080513700946, + null, + null, + 452, + 462, + 452, + 462, + 93, + 95, + true, + "rock types", + "rock types" + ], + [ + "parenthesis", + "round brackets", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 18038023841784269746, + 5076911958286528785, + null, + null, + 463, + 485, + 463, + 485, + 95, + 101, + true, + "(relevant to the PSEs)", + "(relevant to the PSEs)" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625526136278, + 9992496440035043113, + null, + null, + 480, + 484, + 480, + 484, + 99, + 100, + true, + "PSEs", + "PSEs" + ], + [ + "sentence", + "proper", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14434924169405353907, + 4188615434259114638, + null, + null, + 487, + 630, + 487, + 630, + 102, + 131, + true, + "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores.", + "The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores." + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16555760578902726317, + 12858294934694547868, + null, + null, + 491, + 506, + 491, + 506, + 103, + 105, + true, + "full processing", + "full processing" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 4795376748618017018, + 1519274522679485529, + null, + null, + 523, + 535, + 523, + 535, + 108, + 110, + true, + "average rate", + "average rate" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896424078, + 3013760380687037623, + null, + null, + 539, + 542, + 539, + 542, + 111, + 112, + true, + "130", + "130" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541486786, + 17325105373990583372, + null, + null, + 543, + 545, + 543, + 545, + 112, + 113, + true, + "ms", + "ms" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 389609625632301461, + 9968221116647419565, + null, + null, + 550, + 554, + 550, + 554, + 114, + 115, + true, + "page", + "page" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 9601975787076761252, + 1652016010664406025, + null, + null, + 559, + 570, + 559, + 570, + 116, + 118, + true, + "worker core", + "worker core" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 16381206550376895780, + 7682460348962189650, + null, + null, + 577, + 583, + 577, + 583, + 121, + 122, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12400507963759742880, + 15180031475753541242, + null, + null, + 595, + 607, + 595, + 607, + 124, + 126, + true, + "worker nodes", + "worker nodes" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161555640697, + 10904197285886473134, + null, + null, + 624, + 629, + 624, + 629, + 129, + 130, + true, + "cores", + "cores" + ], + [ + "sentence", + "proper", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 14947571950312043839, + 14882716054638140801, + null, + null, + 631, + 698, + 631, + 698, + 131, + 144, + true, + "Eventually, the KG included 679 296 edges connecting 116 662 nodes.", + "Eventually, the KG included 679 296 edges connecting 116 662 nodes." + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 15441160910541480204, + 17325109606585833008, + null, + null, + 647, + 649, + 647, + 649, + 134, + 135, + true, + "KG", + "KG" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896199548, + 3013765380002608726, + null, + null, + 659, + 662, + 659, + 662, + 136, + 137, + true, + "679", + "679" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896436418, + 3013773685141369379, + null, + null, + 663, + 666, + 663, + 666, + 137, + 138, + true, + "296", + "296" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104162186494203, + 10532267587085008644, + null, + null, + 667, + 672, + 667, + 672, + 138, + 139, + true, + "edges", + "edges" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896426647, + 3013760979502921720, + null, + null, + 684, + 687, + 684, + 687, + 140, + 141, + true, + "116", + "116" + ], + [ + "numval", + "ival", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 12178341415896199474, + 3013765420737865825, + null, + null, + 688, + 691, + 688, + 691, + 141, + 142, + true, + "662", + "662" + ], + [ + "term", + "single-term", + 7040847965650746591, + "TEXT", + "#/texts/135", + 1.0, + 329104161758737773, + 10900831603847294449, + null, + null, + 692, + 697, + 692, + 697, + 142, + 143, + true, + "nodes", + "nodes" + ], + [ + "sentence", + "proper", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 997268821951707686, + 14496834891993701721, + null, + null, + 0, + 79, + 0, + 79, + 0, + 17, + true, + "In step (3), we query the Knowledge Graph using a tailored evaluation workflow.", + "In step (3), we query the Knowledge Graph using a tailored evaluation workflow." + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625741100019, + 9811489708275752315, + null, + null, + 3, + 7, + 3, + 7, + 1, + 2, + true, + "step", + "step" + ], + [ + "parenthesis", + "reference", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 12178341415896394992, + 6455622738827563926, + null, + null, + 8, + 11, + 8, + 11, + 2, + 5, + true, + "(3)", + "(3)" + ], + [ + "numval", + "ival", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 17767354399704235163, + 96563067760012599, + null, + null, + 9, + 10, + 9, + 10, + 3, + 4, + true, + "3", + "3" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 5877539623435777295, + 1548140198302342719, + null, + null, + 26, + 41, + 26, + 41, + 9, + 11, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 11745701326970380985, + 14524400766422166580, + null, + null, + 59, + 78, + 59, + 78, + 14, + 16, + true, + "evaluation workflow", + "evaluation workflow" + ], + [ + "sentence", + "improper", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 460808421414733701, + 12585670206319025150, + null, + null, + 80, + 218, + 80, + 218, + 17, + 42, + true, + "This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", + "This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 14638857990842534974, + 11824267461300128868, + null, + null, + 85, + 93, + 85, + 93, + 18, + 19, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625526136278, + 10206463490393311472, + null, + null, + 116, + 120, + 116, + 120, + 23, + 24, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 14088628410271132453, + 16581151048247701778, + null, + null, + 141, + 151, + 141, + 151, + 27, + 28, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 5877539623435777295, + 1548140198302432862, + null, + null, + 159, + 174, + 159, + 174, + 30, + 32, + true, + "Knowledge Graph", + "Knowledge Graph" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 8106397496085150773, + 2627697033995097526, + null, + null, + 180, + 187, + 180, + 187, + 34, + 35, + true, + "example", + "example" + ], + [ + "term", + "enum-term-mark-2", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 6764907971677770258, + 6963840925279480990, + null, + null, + 195, + 218, + 195, + 218, + 37, + 42, + true, + "age, formation and rock", + "age, formation and rock" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 12178341415895571749, + 6455357909209920985, + null, + null, + 195, + 198, + 195, + 198, + 37, + 38, + true, + "age", + "age" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 6187533480885532545, + 3663640608662331706, + null, + null, + 200, + 209, + 200, + 209, + 39, + 40, + true, + "formation", + "formation" + ], + [ + "term", + "single-term", + 7927601225025519287, + "TEXT", + "#/texts/136", + 1.0, + 389609625632802170, + 10177362053775881094, + null, + null, + 214, + 218, + 214, + 218, + 41, + 42, + true, + "rock", + "rock" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/137", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 1080447728722590402, + "TEXT", + "#/texts/138", + 1.0, + 15441160910541481977, + 12490742773547210041, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "13", + "13" + ], + [ + "sentence", + "improper", + 4361549257087816853, + "TEXT", + "#/texts/139", + 1.0, + 15441160910541485670, + 9983842722140753537, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "of", + "of" + ], + [ + "numval", + "ival", + 4361549257087816853, + "TEXT", + "#/texts/139", + 1.0, + 15441160910541481979, + 9983816787922721487, + null, + null, + 3, + 5, + 3, + 5, + 1, + 2, + true, + "15", + "15" + ], + [ + "sentence", + "improper", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 4575797946527946612, + 5297035185336180529, + null, + null, + 0, + 12, + 0, + 12, + 0, + 2, + true, + "composition.", + "composition." + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14749101077007455096, + 13375337667618460743, + null, + null, + 0, + 11, + 0, + 11, + 0, + 1, + true, + "composition", + "composition" + ], + [ + "sentence", + "proper", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 1519458104665017357, + 10083376948813375189, + null, + null, + 13, + 64, + 13, + 64, + 2, + 14, + true, + "In Figure 7, we visualize the DAG of this workflow.", + "In Figure 7, we visualize the DAG of this workflow." + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 16381206514091025767, + 977586802525207516, + null, + null, + 16, + 22, + 16, + 22, + 3, + 4, + true, + "Figure", + "Figure" + ], + [ + "numval", + "ival", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 17767354399704235159, + 15458436803011088578, + null, + null, + 23, + 24, + 23, + 24, + 4, + 5, + true, + "7", + "7" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 12178341415896112046, + 5461004591321450263, + null, + null, + 43, + 46, + 43, + 46, + 9, + 10, + true, + "DAG", + "DAG" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14638857990842534974, + 11234311347164808960, + null, + null, + 55, + 63, + 55, + 63, + 12, + 13, + true, + "workflow", + "workflow" + ], + [ + "sentence", + "proper", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 875194381256283721, + 17161126138395688234, + null, + null, + 65, + 191, + 65, + 191, + 14, + 35, + true, + "The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", + "The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node." + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 2709262247996496944, + 7256068078148418519, + null, + null, + 69, + 87, + 69, + 87, + 15, + 18, + true, + "final node weights", + "final node weights" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14652253554560347064, + 10490266172391457967, + null, + null, + 119, + 127, + 119, + 127, + 22, + 23, + true, + "branches", + "branches" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14638857990842534974, + 11234311347164820372, + null, + null, + 135, + 143, + 135, + 143, + 25, + 26, + true, + "workflow", + "workflow" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 14475039354487345031, + 12703529367274285661, + null, + null, + 162, + 177, + 162, + 177, + 29, + 31, + true, + "relevance score", + "relevance score" + ], + [ + "term", + "single-term", + 8207961846673301043, + "TEXT", + "#/texts/140", + 1.0, + 389609625621164460, + 10220904674049331646, + null, + null, + 186, + 190, + 186, + 190, + 33, + 34, + true, + "node", + "node" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 4387565148892077336, + 10851632580506485696, + null, + null, + 0, + 125, + 0, + 125, + 0, + 23, + true, + "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy.", + "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy." + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2993400436143573854, + 495563557915533550, + null, + null, + 16, + 27, + 16, + 27, + 3, + 4, + true, + "correctness", + "correctness" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 5371881787938650225, + 1603959675351734932, + null, + null, + 45, + 59, + 45, + 59, + 7, + 9, + true, + "PSE properties", + "PSE properties" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 388046855546136742, + 9120352735722685290, + null, + null, + 75, + 92, + 75, + 92, + 13, + 15, + true, + "standard practice", + "standard practice" + ], + [ + "expression", + "word-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678051, + 18090963244175576847, + null, + null, + 110, + 115, + 110, + 115, + 18, + 21, + true, + "top-k", + "top-k" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 12178341415895527965, + 3846485752486964713, + null, + null, + 110, + 113, + 110, + 113, + 18, + 19, + true, + "top", + "top" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16090719762865250829, + 17746369558050044107, + null, + null, + 114, + 124, + 114, + 124, + 20, + 22, + true, + "k accuracy", + "k accuracy" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16884271044112956615, + 14180978077955603249, + null, + null, + 126, + 254, + 126, + 254, + 23, + 47, + true, + "This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents.", + "This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents." + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 13928971162448274670, + 12605656053258808723, + null, + null, + 150, + 160, + 150, + 160, + 28, + 29, + true, + "percentage", + "percentage" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 8106397678203715209, + 10309807798569118015, + null, + null, + 198, + 205, + 198, + 205, + 37, + 38, + true, + "answers", + "answers" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206574646599727, + 10453069960511565431, + null, + null, + 227, + 233, + 227, + 233, + 41, + 42, + true, + "answer", + "answer" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6167933651658664291, + 18428108737827032217, + null, + null, + 244, + 253, + 244, + 253, + 45, + 46, + true, + "documents", + "documents" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 7112523875671286026, + 15753540610980998668, + null, + null, + 255, + 371, + 255, + 371, + 47, + 80, + true, + "In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element.", + "In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element." + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235161, + 17845175019612967856, + null, + null, + 264, + 265, + 264, + 265, + 49, + 50, + true, + "1", + "1" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678245, + 18090963127902817900, + null, + null, + 279, + 284, + 279, + 284, + 54, + 57, + true, + "top-1", + "top-1" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482672, + 15292900460193668121, + null, + null, + 282, + 284, + 282, + 284, + 55, + 57, + true, + "-1", + "-1" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678244, + 18090963127852540080, + null, + null, + 286, + 291, + 286, + 291, + 58, + 61, + true, + "top-2", + "top-2" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482673, + 15292900459317583926, + null, + null, + 289, + 291, + 289, + 291, + 59, + 61, + true, + "-2", + "-2" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678251, + 18090963018010454873, + null, + null, + 293, + 298, + 293, + 298, + 62, + 65, + true, + "top-3", + "top-3" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482674, + 15292900461018240016, + null, + null, + 296, + 298, + 296, + 298, + 63, + 65, + true, + "-3", + "-3" + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678249, + 18090963122544700654, + null, + null, + 304, + 309, + 304, + 309, + 67, + 70, + true, + "top-5", + "top-5" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482676, + 15292900461174373895, + null, + null, + 307, + 309, + 307, + 309, + 68, + 70, + true, + "-5", + "-5" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14650440612701450082, + 11645521409381699883, + null, + null, + 310, + 318, + 310, + 318, + 70, + 71, + true, + "accuracy", + "accuracy" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14088628410271132453, + 1572692648601073579, + null, + null, + 327, + 337, + 327, + 337, + 73, + 74, + true, + "properties", + "properties" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15085703780898398044, + 6927514159527930982, + null, + null, + 346, + 370, + 346, + 370, + 76, + 79, + true, + "petroleum system element", + "petroleum system element" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9700386374170371940, + 17921036849237798431, + null, + null, + 372, + 411, + 372, + 411, + 80, + 87, + true, + "One can make two distinct observations.", + "One can make two distinct observations." + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9212537002518769220, + 3673646388873906846, + null, + null, + 389, + 410, + 389, + 410, + 84, + 86, + true, + "distinct observations", + "distinct observations" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 1441259272582849102, + 16488672922824239869, + null, + null, + 412, + 567, + 412, + 567, + 87, + 129, + true, + "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision).", + "First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision)." + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678245, + 18090963127902815000, + null, + null, + 423, + 428, + 423, + 428, + 90, + 93, + true, + "top-1", + "top-1" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482672, + 15292900460193644573, + null, + null, + 426, + 428, + 426, + 428, + 91, + 93, + true, + "-1", + "-1" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 8106352625329644634, + 13683368641090279135, + null, + null, + 429, + 436, + 429, + 436, + 93, + 94, + true, + "numbers", + "numbers" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104161634702433, + 2234794017392814741, + null, + null, + 448, + 453, + 448, + 453, + 97, + 98, + true, + "range", + "range" + ], + [ + "numval", + "fval", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14652250303396477617, + 6263954298368962822, + null, + null, + 457, + 465, + 457, + 465, + 99, + 106, + true, + "0.75-0.9", + "0.75-0.9" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235163, + 17845175019597634812, + null, + null, + 484, + 485, + 484, + 485, + 110, + 111, + true, + "3", + "3" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 17767354399704235156, + 17845175019331480896, + null, + null, + 489, + 490, + 489, + 490, + 112, + 113, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104161511786824, + 2268234006473983274, + null, + null, + 491, + 496, + 491, + 496, + 113, + 114, + true, + "cases", + "cases" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14476305150084091928, + 4901476961065610348, + null, + null, + 507, + 522, + 507, + 522, + 117, + 119, + true, + "relevant result", + "relevant result" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541480204, + 15292900207337913499, + null, + null, + 540, + 542, + 540, + 542, + 122, + 123, + true, + "KG", + "KG" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 1151740806635216288, + 9949844321651855821, + null, + null, + 555, + 566, + 555, + 566, + 125, + 128, + true, + "(precision)", + "(precision)" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6184954595655792282, + 10326926050568403160, + null, + null, + 556, + 565, + 556, + 565, + 126, + 127, + true, + "precision", + "precision" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 6488415355581121222, + 4997443328230932797, + null, + null, + 568, + 739, + 568, + 737, + 129, + 169, + true, + "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall).", + "Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall)." + ], + [ + "expression", + "wtoken-concatenation", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 329104159242678249, + 18090963122544576128, + null, + null, + 598, + 603, + 598, + 603, + 135, + 138, + true, + "top-5", + "top-5" + ], + [ + "numval", + "ival", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 15441160910541482676, + 15292900461174286862, + null, + null, + 601, + 603, + 601, + 603, + 136, + 138, + true, + "-5", + "-5" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 8106352625329644634, + 13683368641090042611, + null, + null, + 604, + 611, + 604, + 611, + 138, + 139, + true, + "numbers", + "numbers" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14824366717978656546, + 12784736972371149059, + null, + null, + 626, + 636, + 626, + 634, + 142, + 148, + true, + "(\u2265 0.97)", + "(\u2265 0.97)" + ], + [ + "numval", + "fval", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 389609625535995626, + 11162238664629223042, + null, + null, + 631, + 635, + 629, + 633, + 144, + 147, + true, + "0.97", + "0.97" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206550376895780, + 15564683093048068331, + null, + null, + 655, + 661, + 653, + 659, + 152, + 153, + true, + "system", + "system" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 417457895991466544, + 13846422900098246222, + null, + null, + 666, + 677, + 664, + 675, + 154, + 156, + true, + "able detect", + "able detect" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 389609625526136278, + 11163534876152642859, + null, + null, + 704, + 708, + 702, + 706, + 161, + 162, + true, + "PSEs", + "PSEs" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14088628410271132453, + 1572692648597504797, + null, + null, + 719, + 729, + 717, + 727, + 164, + 165, + true, + "properties", + "properties" + ], + [ + "parenthesis", + "round brackets", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14654064136955905430, + 8140184277037689536, + null, + null, + 730, + 738, + 728, + 736, + 165, + 168, + true, + "(recall)", + "(recall)" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206521531485437, + 2410946269934605693, + null, + null, + 731, + 737, + 729, + 735, + 166, + 167, + true, + "recall", + "recall" + ], + [ + "sentence", + "proper", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 9883022576161827356, + 11048410564078668147, + null, + null, + 740, + 834, + 738, + 832, + 169, + 186, + true, + "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", + "Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory." + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 16381206521531485437, + 2410946269934606663, + null, + null, + 750, + 756, + 748, + 754, + 172, + 173, + true, + "recall", + "recall" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 2136116818459714255, + 8001485684986399574, + null, + null, + 764, + 783, + 762, + 781, + 175, + 177, + true, + "language annotators", + "language annotators" + ], + [ + "term", + "single-term", + 11998199584890640594, + "TEXT", + "#/texts/141", + 1.0, + 14857819661511796263, + 10963692960997527590, + null, + null, + 791, + 811, + 789, + 809, + 179, + 182, + true, + "KG creation pipeline", + "KG creation pipeline" + ], + [ + "numval", + "ival", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 17767354399704235158, + 11362596522813034737, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "6", + "6" + ], + [ + "sentence", + "improper", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 16842535493722576894, + 9070661535139415199, + null, + null, + 2, + 15, + 2, + 15, + 1, + 3, + true, + "| CONCLUSIONS", + "| CONCLUSIONS" + ], + [ + "term", + "single-term", + 16446129547721407877, + "TEXT", + "#/texts/142", + 1.0, + 4494148153097800926, + 5377935386843765038, + null, + null, + 4, + 15, + 4, + 15, + 2, + 3, + true, + "CONCLUSIONS", + "CONCLUSIONS" + ], + [ + "sentence", + "proper", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16415944396843588595, + 17709466530502406570, + null, + null, + 0, + 221, + 0, + 221, + 0, + 35, + true, + "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution.", + "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution." + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 597480423109041411, + 10291295016493852070, + null, + null, + 9, + 21, + 9, + 21, + 2, + 3, + true, + "introduction", + "introduction" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12779036928191531604, + 9594818965137662456, + null, + null, + 29, + 41, + 29, + 41, + 5, + 7, + true, + "CPS platform", + "CPS platform" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 13236236219972254996, + 10365732013549486229, + null, + null, + 58, + 77, + 58, + 77, + 10, + 12, + true, + "substantial benefit", + "substantial benefit" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 4156286750856532243, + 5197485465821099672, + null, + null, + 82, + 96, + 82, + 96, + 13, + 15, + true, + "domain experts", + "domain experts" + ], + [ + "term", + "enum-term-mark-3", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15152392191731287429, + 5208425395920976424, + null, + null, + 89, + 116, + 89, + 116, + 14, + 18, + true, + "experts and data scientists", + "experts and data scientists" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6736565927644210758, + 5282440466057373776, + null, + null, + 101, + 116, + 101, + 116, + 16, + 18, + true, + "data scientists", + "data scientists" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 8856108142217449705, + 18116497161907130755, + null, + null, + 131, + 147, + 131, + 147, + 20, + 22, + true, + "deep exploration", + "deep exploration" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6184122545182835014, + 15596336968217381296, + null, + null, + 161, + 170, + 161, + 170, + 24, + 25, + true, + "knowledge", + "knowledge" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 18397948429710667913, + 14377408563795531173, + null, + null, + 198, + 220, + 198, + 220, + 31, + 34, + true, + "modular cloud solution", + "modular cloud solution" + ], + [ + "sentence", + "proper", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 337455657200974030, + 8226213723352014521, + null, + null, + 222, + 449, + 222, + 449, + 35, + 72, + true, + "CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics.", + "CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics." + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896222428, + 5534090683430116629, + null, + null, + 222, + 225, + 222, + 225, + 35, + 36, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896222616, + 5534090651231733521, + null, + null, + 253, + 256, + 253, + 256, + 40, + 41, + true, + "CSS", + "CSS" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 2924827465470055507, + 17840272590051434356, + null, + null, + 299, + 317, + 299, + 317, + 49, + 51, + true, + "automated pipeline", + "automated pipeline" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 5640550190838251117, + 9242927206023493986, + null, + null, + 327, + 361, + 327, + 361, + 53, + 57, + true, + "consistent domain knowledge models", + "consistent domain knowledge models" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16567127297746127145, + 14122132328015379420, + null, + null, + 380, + 397, + 380, + 397, + 61, + 63, + true, + "powerful approach", + "powerful approach" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 13481069231801630849, + 3957307499739387254, + null, + null, + 401, + 422, + 401, + 422, + 64, + 66, + true, + "explorational queries", + "explorational queries" + ], + [ + "expression", + "word-concatenation", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 1053045968880146688, + 12583633977815123246, + null, + null, + 427, + 438, + 427, + 438, + 67, + 70, + true, + "graph-scale", + "graph-scale" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 329104159211247965, + 5385052269660054940, + null, + null, + 427, + 432, + 427, + 432, + 67, + 68, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 1984147173260580992, + 12482101824871936564, + null, + null, + 433, + 448, + 433, + 448, + 69, + 71, + true, + "scale analytics", + "scale analytics" + ], + [ + "sentence", + "improper", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6626158461069440872, + 1924856031834324320, + null, + null, + 450, + 699, + 450, + 699, + 72, + 117, + true, + "This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", + "This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 14155725816972569762, + 3313864890251993459, + null, + null, + 485, + 518, + 485, + 518, + 77, + 80, + true, + "fundamental design considerations", + "fundamental design considerations" + ], + [ + "parenthesis", + "reference", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896395122, + 5534148918627002152, + null, + null, + 520, + 523, + 520, + 523, + 81, + 84, + true, + "(1)", + "(1)" + ], + [ + "numval", + "ival", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 17767354399704235161, + 16606870843966802051, + null, + null, + 521, + 522, + 521, + 522, + 82, + 83, + true, + "1", + "1" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 3841439629787535208, + 4030642280425566118, + null, + null, + 542, + 562, + 542, + 562, + 88, + 91, + true, + "manual data curation", + "manual data curation" + ], + [ + "term", + "enum-term-mark-2", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 16305600260127055870, + 8195345917782449202, + null, + null, + 554, + 576, + 554, + 576, + 90, + 93, + true, + "curation or annotation", + "curation or annotation" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15359807916847495711, + 16353024261898901635, + null, + null, + 566, + 576, + 566, + 576, + 92, + 93, + true, + "annotation", + "annotation" + ], + [ + "parenthesis", + "reference", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 12178341415896395187, + 5534148917561236863, + null, + null, + 578, + 581, + 578, + 581, + 94, + 97, + true, + "(2)", + "(2)" + ], + [ + "numval", + "ival", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 17767354399704235162, + 16606870838110795262, + null, + null, + 579, + 580, + 579, + 580, + 95, + 96, + true, + "2", + "2" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 4430075463994275386, + 1531064733678072666, + null, + null, + 603, + 625, + 603, + 625, + 102, + 104, + true, + "efficient architecture", + "efficient architecture" + ], + [ + "term", + "enum-term-mark-2", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 7917122769361737138, + 17010355979462668621, + null, + null, + 641, + 672, + 641, + 672, + 107, + 112, + true, + "ingestion, processing and query", + "ingestion, processing and query" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 6182654480499682241, + 10279250109304112765, + null, + null, + 641, + 650, + 641, + 650, + 107, + 108, + true, + "ingestion", + "ingestion" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 14088627147213114570, + 14322058830662955649, + null, + null, + 652, + 662, + 652, + 662, + 109, + 110, + true, + "processing", + "processing" + ], + [ + "term", + "single-term", + 6720443978031524294, + "TEXT", + "#/texts/143", + 1.0, + 15144348319402645349, + 2995094926762721744, + null, + null, + 667, + 682, + 667, + 682, + 111, + 113, + true, + "query workloads", + "query workloads" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/144", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "sentence", + "improper", + 2144926730621142072, + "TEXT", + "#/texts/145", + 1.0, + 16380805732317250115, + 5189702932560370903, + null, + null, + 0, + 6, + 0, + 6, + 0, + 3, + true, + "14of15", + "14of15" + ], + [ + "reference", + "reference-number", + 2144926730621142072, + "TEXT", + "#/texts/145", + 1.0, + 16380805732317250115, + 5189702932560370903, + null, + null, + 0, + 6, + 0, + 6, + 0, + 3, + true, + "14of15", + "14of15" + ], + [ + "sentence", + "improper", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 8274346334061681675, + 2314658471919352980, + null, + null, + 0, + 22, + 0, + 22, + 0, + 5, + true, + "a single platform; and", + "a single platform; and" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17809956737872564404, + 98872371406955147, + null, + null, + 2, + 17, + 2, + 17, + 1, + 3, + true, + "single platform", + "single platform" + ], + [ + "parenthesis", + "reference", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 12178341415896394992, + 13000428721171190822, + null, + null, + 23, + 26, + 23, + 26, + 5, + 8, + true, + "(3)", + "(3)" + ], + [ + "numval", + "ival", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17767354399704235163, + 2699991593779864855, + null, + null, + 24, + 25, + 24, + 25, + 6, + 7, + true, + "3", + "3" + ], + [ + "sentence", + "proper", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 22046687723110617, + 16139393736301290316, + null, + null, + 27, + 119, + 27, + 119, + 8, + 22, + true, + "We expose the capabilities through an intuitively consumable API and complementary UI tools.", + "We expose the capabilities through an intuitively consumable API and complementary UI tools." + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 11892545746641362388, + 4268311181255501740, + null, + null, + 41, + 53, + 41, + 53, + 11, + 12, + true, + "capabilities", + "capabilities" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 2165839110633348678, + 11437800686337083589, + null, + null, + 77, + 91, + 77, + 91, + 15, + 17, + true, + "consumable API", + "consumable API" + ], + [ + "term", + "single-term", + 14222671032550229818, + "TEXT", + "#/texts/146", + 1.0, + 17732874984494701283, + 1188095097533333023, + null, + null, + 96, + 118, + 96, + 118, + 18, + 21, + true, + "complementary UI tools", + "complementary UI tools" + ], + [ + "sentence", + "proper", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 7919487032278953085, + 6331733154504057997, + null, + null, + 0, + 157, + 0, + 157, + 0, + 31, + true, + "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team.", + "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team." + ], + [ + "term", + "enum-term-mark-2", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 3124601704379826877, + 4362564571906744374, + null, + null, + 7, + 29, + 7, + 29, + 2, + 7, + true, + "oil and gas case study", + "oil and gas case study" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415895623363, + 8268162173441645749, + null, + null, + 7, + 10, + 7, + 10, + 2, + 3, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 10318284848910968979, + 1295911181594562505, + null, + null, + 15, + 29, + 15, + 29, + 4, + 7, + true, + "gas case study", + "gas case study" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 14635106751859230946, + 13564055847176212531, + null, + null, + 60, + 68, + 60, + 68, + 12, + 13, + true, + "solution", + "solution" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 8973266897479869153, + 7123298173656310256, + null, + null, + 75, + 97, + 75, + 97, + 15, + 19, + true, + "real-world application", + "real-world application" + ], + [ + "expression", + "word-concatenation", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 15984801488078789848, + 8130345344279106999, + null, + null, + 75, + 85, + 75, + 85, + 15, + 18, + true, + "real-world", + "real-world" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 389609625695143886, + 6422601049208570234, + null, + null, + 107, + 111, + 107, + 111, + 21, + 22, + true, + "help", + "help" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2532084510793506348, + 12340694780983444669, + null, + null, + 115, + 137, + 115, + 137, + 23, + 26, + true, + "subject matter experts", + "subject matter experts" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2350671729723156275, + 15376321056085788759, + null, + null, + 145, + 156, + 145, + 156, + 28, + 30, + true, + "client team", + "client team" + ], + [ + "sentence", + "proper", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 13179783236811628643, + 10576686414353350640, + null, + null, + 158, + 322, + 158, + 322, + 31, + 63, + true, + "Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", + "Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry." + ], + [ + "term", + "enum-term-mark-4", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 2824345713217859749, + 9704847614342538686, + null, + null, + 169, + 180, + 169, + 180, + 33, + 36, + true, + "CCS and CPS", + "CCS and CPS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415896221596, + 8268263923333827885, + null, + null, + 169, + 172, + 169, + 172, + 33, + 34, + true, + "CCS", + "CCS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415896222428, + 8268263906205242250, + null, + null, + 177, + 180, + 177, + 180, + 35, + 36, + true, + "CPS", + "CPS" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 11025819392462273971, + 1247396544615538597, + null, + null, + 217, + 235, + 217, + 235, + 43, + 45, + true, + "client engagements", + "client engagements" + ], + [ + "term", + "enum-term-mark-2", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 848781837929279741, + 13179566797715975811, + null, + null, + 257, + 277, + 257, + 277, + 50, + 54, + true, + "oil and gas industry", + "oil and gas industry" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 12178341415895623363, + 8268162173441630103, + null, + null, + 257, + 260, + 257, + 260, + 50, + 51, + true, + "oil", + "oil" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 17613546823892249124, + 8946947737051436961, + null, + null, + 265, + 277, + 265, + 277, + 52, + 54, + true, + "gas industry", + "gas industry" + ], + [ + "term", + "single-term", + 17486770941839589126, + "TEXT", + "#/texts/147", + 1.0, + 1620835639831122355, + 4723302290987844432, + null, + null, + 296, + 321, + 296, + 321, + 59, + 62, + true, + "material science industry", + "material science industry" + ], + [ + "sentence", + "proper", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 4472913868502496196, + 2721699422055737565, + null, + null, + 0, + 172, + 0, + 172, + 0, + 32, + true, + "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", + "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration." + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 2018557288699233431, + 16975902875644112037, + null, + null, + 0, + 11, + 0, + 11, + 0, + 2, + true, + "Future work", + "Future work" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 13352531518695846369, + 12144575405222745087, + null, + null, + 37, + 56, + 37, + 56, + 6, + 8, + true, + "public repositories", + "public repositories" + ], + [ + "expression", + "wtoken-concatenation", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 5945163521127932196, + 8132279328728560937, + null, + null, + 69, + 78, + 69, + 78, + 11, + 14, + true, + "arXiv.org", + "arXiv.org" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 329104159177127695, + 18251677073609090860, + null, + null, + 69, + 74, + 69, + 74, + 11, + 12, + true, + "arXiv", + "arXiv" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 10655579017626383006, + 12341679994279818148, + null, + null, + 75, + 86, + 75, + 86, + 13, + 15, + true, + "org library", + "org library" + ], + [ + "term", + "enum-term-mark-4", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 15838174910682029174, + 3269528259737521863, + null, + null, + 88, + 105, + 88, + 105, + 16, + 20, + true, + "USPTO, and PubMed", + "USPTO, and PubMed" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 329104162018760499, + 18012444193898764026, + null, + null, + 88, + 93, + 88, + 93, + 16, + 17, + true, + "USPTO", + "USPTO" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 16381206483336886705, + 13596604403738397760, + null, + null, + 99, + 105, + 99, + 105, + 19, + 20, + true, + "PubMed", + "PubMed" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 329104161571401725, + 18386641959556324131, + null, + null, + 109, + 114, + 109, + 114, + 21, + 22, + true, + "order", + "order" + ], + [ + "term", + "single-term", + 16574813224778118841, + "TEXT", + "#/texts/148", + 1.0, + 13671659409933113155, + 9272823095563995053, + null, + null, + 150, + 171, + 150, + 171, + 28, + 31, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "sentence", + "improper", + 3356142343274371864, + "TEXT", + "#/texts/149", + 1.0, + 17772737780533561635, + 1151622547388974028, + null, + null, + 0, + 27, + 0, + 27, + 0, + 3, + true, + "DATA AVAILABILITY STATEMENT", + "DATA AVAILABILITY STATEMENT" + ], + [ + "term", + "single-term", + 3356142343274371864, + "TEXT", + "#/texts/149", + 1.0, + 17772737780533561635, + 1151622547388974028, + null, + null, + 0, + 27, + 0, + 27, + 0, + 3, + true, + "DATA AVAILABILITY STATEMENT", + "DATA AVAILABILITY STATEMENT" + ], + [ + "sentence", + "proper", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 11662592888764396578, + 14754781215187398204, + null, + null, + 0, + 41, + 0, + 41, + 0, + 7, + true, + "Data subject to third party restrictions.", + "Data subject to third party restrictions." + ], + [ + "term", + "single-term", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 389609625537659398, + 6127806615430218387, + null, + null, + 0, + 4, + 0, + 4, + 0, + 1, + true, + "Data", + "Data" + ], + [ + "term", + "single-term", + 4778022085288441371, + "TEXT", + "#/texts/150", + 1.0, + 7076010952609514944, + 4435271120668497918, + null, + null, + 16, + 40, + 16, + 40, + 3, + 6, + true, + "third party restrictions", + "third party restrictions" + ], + [ + "sentence", + "improper", + 4361549257598904601, + "TEXT", + "#/texts/151", + 1.0, + 329104162230294308, + 18235196107168082832, + null, + null, + 0, + 5, + 0, + 5, + 0, + 1, + true, + "ORCID", + "ORCID" + ], + [ + "sentence", + "improper", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 9234676532203821814, + 9496298137639648491, + null, + null, + 0, + 122, + 0, + 122, + 0, + 40, + true, + "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer", + "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer" + ], + [ + "name", + "person-name", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 4686361850733567621, + 8628324652592599079, + null, + null, + 0, + 17, + 0, + 17, + 0, + 6, + true, + "Peter W J Staar", + "Peter W. J. Staar" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 7086030415698247677, + 10516035679311822965, + null, + null, + 18, + 55, + 18, + 55, + 6, + 21, + true, + "https://orcid.org/0000-0002-8088-0823", + "https://orcid.org/0000-0002-8088-0823" + ], + [ + "expression", + "wtoken-concatenation", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 7086030415698247677, + 10516035679311822965, + null, + null, + 18, + 55, + 18, + 55, + 6, + 21, + true, + "https://orcid.org/0000-0002-8088-0823", + "https://orcid.org/0000-0002-8088-0823" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178331, + 6710097973531677104, + null, + null, + 36, + 45, + 36, + 45, + 14, + 17, + true, + "0000-0002", + "0000-0002" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6624857390961351666, + 3541555616013892515, + null, + null, + 46, + 55, + 46, + 55, + 18, + 21, + true, + "8088-0823", + "8088-0823" + ], + [ + "term", + "single-term", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 1571808557594152175, + 14010767871411326211, + null, + null, + 56, + 69, + 56, + 69, + 21, + 23, + true, + "Michele Dolfi", + "Michele Dolfi" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2033258390552333901, + 14596379607593903375, + null, + null, + 70, + 107, + 70, + 107, + 23, + 38, + true, + "https://orcid.org/0000-0001-7216-8505", + "https://orcid.org/0000-0001-7216-8505" + ], + [ + "expression", + "wtoken-concatenation", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2033258390552333901, + 14596379607593903375, + null, + null, + 70, + 107, + 70, + 107, + 23, + 38, + true, + "https://orcid.org/0000-0001-7216-8505", + "https://orcid.org/0000-0001-7216-8505" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178332, + 6710097973532471075, + null, + null, + 88, + 97, + 88, + 97, + 31, + 34, + true, + "0000-0001", + "0000-0001" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6560223242063427106, + 13609528576140932418, + null, + null, + 98, + 107, + 98, + 107, + 35, + 38, + true, + "7216-8505", + "7216-8505" + ], + [ + "term", + "single-term", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 9737597816447750448, + 4222775986855314534, + null, + null, + 108, + 122, + 108, + 122, + 38, + 40, + true, + "Christoph Auer", + "Christoph Auer" + ], + [ + "link", + "url", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 2031879929749239141, + 13323569836539834175, + null, + null, + 123, + 160, + 123, + 160, + 40, + 55, + true, + "https://orcid.org/0000-0001-5761-0422", + "https://orcid.org/0000-0001-5761-0422" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 10302035827600178332, + 6710097973532498930, + null, + null, + 141, + 150, + 141, + 150, + 48, + 51, + true, + "0000-0001", + "0000-0001" + ], + [ + "numval", + "irng", + 3523281823889115814, + "TEXT", + "#/texts/152", + 1.0, + 6573923715856392023, + 13497670743408223376, + null, + null, + 151, + 160, + 151, + 160, + 52, + 55, + true, + "5761-0422", + "5761-0422" + ], + [ + "sentence", + "improper", + 8500729849894221215, + "TEXT", + "#/texts/153", + 1.0, + 14650266124350583462, + 13656738482730710169, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ENDNOTES", + "ENDNOTES" + ], + [ + "term", + "single-term", + 8500729849894221215, + "TEXT", + "#/texts/153", + 1.0, + 14650266124350583462, + 13656738482730710169, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "ENDNOTES", + "ENDNOTES" + ], + [ + "sentence", + "improper", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 17767354399704235138, + 12639988856153391105, + null, + null, + 0, + 1, + 0, + 1, + 0, + 1, + true, + "*", + "*" + ], + [ + "sentence", + "proper", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 15289232076819477879, + 7514275424619623119, + null, + null, + 2, + 99, + 2, + 99, + 1, + 30, + true, + "For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", + "For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org)." + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 8106397496085150773, + 634835345710543557, + null, + null, + 6, + 13, + 6, + 13, + 2, + 3, + true, + "example", + "example" + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7002898201903728267, + 1737821260812359285, + null, + null, + 15, + 28, + 15, + 28, + 4, + 5, + true, + "ElasticSearch", + "ElasticSearch" + ], + [ + "parenthesis", + "round brackets", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 569129533218351355, + 3470387564381472056, + null, + null, + 29, + 53, + 29, + 53, + 5, + 16, + true, + "(https://www.elastic.co)", + "(https://www.elastic.co)" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 3527101060180289873, + 4288347075719597580, + null, + null, + 30, + 52, + 30, + 52, + 6, + 15, + true, + "https://www.elastic.co", + "https://www.elastic.co" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7699234159584878934, + 8720273332387288393, + null, + null, + 38, + 52, + 38, + 52, + 10, + 15, + true, + "www.elastic.co", + "www.elastic.co" + ], + [ + "term", + "single-term", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 18329142643795090602, + 2655325726805406767, + null, + null, + 58, + 70, + 58, + 70, + 17, + 18, + true, + "ApacheLucene", + "ApacheLucene" + ], + [ + "parenthesis", + "round brackets", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 9861891912574044258, + 4499735700376823345, + null, + null, + 71, + 98, + 71, + 98, + 18, + 29, + true, + "(https://lucene.apache.org)", + "(https://lucene.apache.org)" + ], + [ + "link", + "url", + 7813503946963688644, + "TEXT", + "#/texts/154", + 1.0, + 7381438071617048818, + 3762754436696500331, + null, + null, + 72, + 97, + 72, + 97, + 19, + 28, + true, + "https://lucene.apache.org", + "https://lucene.apache.org" + ], + [ + "sentence", + "improper", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17767354399704341640, + 1655277645618781842, + null, + null, + 0, + 3, + 0, + 1, + 0, + 1, + true, + "\u2020", + "\u2020" + ], + [ + "sentence", + "proper", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 12458532663664098281, + 15414412942250901023, + null, + null, + 4, + 160, + 2, + 158, + 1, + 27, + true, + "Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions.", + "Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions." + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 5234082820457819963, + 10637531498360814115, + null, + null, + 4, + 26, + 2, + 24, + 1, + 4, + true, + "Most language entities", + "Most language entities" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 6630151693041027733, + 5310121539758151013, + null, + null, + 34, + 49, + 32, + 47, + 6, + 8, + true, + "technical field", + "technical field" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 5273909445408112658, + 2278695577032735159, + null, + null, + 96, + 108, + 94, + 106, + 16, + 18, + true, + "rigorous way", + "rigorous way" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17163002546996330472, + 3748678944934416450, + null, + null, + 140, + 159, + 138, + 157, + 24, + 26, + true, + "regular expressions", + "regular expressions" + ], + [ + "sentence", + "proper", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 931259114935419412, + 2966611005798001879, + null, + null, + 161, + 285, + 159, + 283, + 27, + 48, + true, + "We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", + "We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations." + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 14814125472896938138, + 13430040721706784836, + null, + null, + 178, + 186, + 176, + 184, + 31, + 32, + true, + "practice", + "practice" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 17163002546996330472, + 3748678944934419646, + null, + null, + 188, + 207, + 186, + 205, + 33, + 35, + true, + "regular expressions", + "regular expressions" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 6557955699305751580, + 14416158030891845149, + null, + null, + 225, + 234, + 223, + 232, + 37, + 39, + true, + "DL models", + "DL models" + ], + [ + "term", + "single-term", + 9230987401345399746, + "TEXT", + "#/texts/155", + 1.0, + 12118184688624410579, + 7523104278049565649, + null, + null, + 269, + 284, + 267, + 282, + 46, + 47, + true, + "representations", + "representations" + ], + [ + "sentence", + "improper", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 17767354399704341641, + 15453018270956350746, + null, + null, + 0, + 3, + 0, + 1, + 0, + 1, + true, + "\u2021", + "\u2021" + ], + [ + "link", + "url", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 11080755855567888942, + 12138756017738546093, + null, + null, + 4, + 24, + 2, + 22, + 1, + 10, + true, + "https://www.nltk.org", + "https://www.nltk.org" + ], + [ + "link", + "url", + 1997735398126013155, + "TEXT", + "#/texts/156", + 1.0, + 7030452472279930374, + 3139262024232962844, + null, + null, + 12, + 24, + 10, + 22, + 5, + 10, + true, + "www.nltk.org", + "www.nltk.org" + ], + [ + "sentence", + "improper", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 17767354399704232711, + 4203992233791646194, + null, + null, + 0, + 2, + 0, + 1, + 0, + 1, + true, + "\u00a7", + "\u00a7" + ], + [ + "sentence", + "proper", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 12149225629366182819, + 13287297407560091582, + null, + null, + 3, + 53, + 2, + 52, + 1, + 11, + true, + "We follow the standard JSON-schema for references.", + "We follow the standard JSON-schema for references." + ], + [ + "term", + "single-term", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 3677160120858730376, + 17054612376731143605, + null, + null, + 17, + 30, + 16, + 29, + 4, + 6, + true, + "standard JSON", + "standard JSON" + ], + [ + "expression", + "word-concatenation", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 11674671916710033839, + 13118355578687598339, + null, + null, + 26, + 37, + 25, + 36, + 5, + 8, + true, + "JSON-schema", + "JSON-schema" + ], + [ + "term", + "single-term", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 16381206579179442550, + 17448384136381969736, + null, + null, + 31, + 37, + 30, + 36, + 7, + 8, + true, + "schema", + "schema" + ], + [ + "term", + "single-term", + 13566764974477978642, + "TEXT", + "#/texts/157", + 1.0, + 15984565858548749625, + 721337063821589131, + null, + null, + 42, + 52, + 41, + 51, + 9, + 10, + true, + "references", + "references" + ], + [ + "sentence", + "improper", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 17767354399704232726, + 13902072770511598079, + null, + null, + 0, + 2, + 0, + 1, + 0, + 1, + true, + "\u00b6", + "\u00b6" + ], + [ + "sentence", + "proper", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 7380356609967428771, + 11039135432617650461, + null, + null, + 3, + 151, + 2, + 150, + 1, + 32, + true, + "A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1).", + "A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1)." + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14238812658426593966, + 13924064151272705753, + null, + null, + 12, + 36, + 11, + 35, + 3, + 6, + true, + "simple similarity metric", + "simple similarity metric" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 5203229829211163848, + 10805139012141424660, + null, + null, + 53, + 69, + 52, + 68, + 10, + 12, + true, + "fuzzy comparison", + "fuzzy comparison" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 329104161568027276, + 7648688669911791224, + null, + null, + 77, + 82, + 76, + 81, + 14, + 15, + true, + "names", + "names" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14652256560445338257, + 10525189337855255576, + null, + null, + 102, + 110, + 101, + 109, + 19, + 20, + true, + "entities", + "entities" + ], + [ + "parenthesis", + "round brackets", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 9828412089918712334, + 6215717056563871310, + null, + null, + 111, + 150, + 110, + 149, + 20, + 31, + true, + "(ie, the name field found in Listing 1)", + "(ie, the name field found in Listing 1)" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 15441160910541486545, + 16190146737237010835, + null, + null, + 112, + 114, + 111, + 113, + 21, + 22, + true, + "ie", + "ie" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 2451855113324595828, + 18220665665570411090, + null, + null, + 120, + 130, + 119, + 129, + 24, + 26, + true, + "name field", + "name field" + ], + [ + "numval", + "ival", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 17767354399704235161, + 13902073100028876379, + null, + null, + 148, + 149, + 147, + 148, + 29, + 30, + true, + "1", + "1" + ], + [ + "sentence", + "proper", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 8544327399273637150, + 76400959495078138, + null, + null, + 152, + 248, + 151, + 247, + 32, + 49, + true, + "A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", + "A more sophisticated approach is to use word embeddings to identify if two concepts are similar." + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 8980863917750970521, + 8109873253237463962, + null, + null, + 159, + 181, + 158, + 180, + 34, + 36, + true, + "sophisticated approach", + "sophisticated approach" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 16942949857064565838, + 2076435900086379767, + null, + null, + 192, + 207, + 191, + 206, + 39, + 41, + true, + "word embeddings", + "word embeddings" + ], + [ + "term", + "single-term", + 4925537010788978399, + "TEXT", + "#/texts/158", + 1.0, + 14652282388618227426, + 11300706950781769100, + null, + null, + 227, + 235, + 226, + 234, + 45, + 46, + true, + "concepts", + "concepts" + ], + [ + "sentence", + "improper", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 15441160910541482490, + 16703317440425394779, + null, + null, + 0, + 2, + 0, + 2, + 0, + 1, + true, + "**", + "**" + ], + [ + "sentence", + "proper", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 12200322802088853735, + 5911179317975529042, + null, + null, + 3, + 70, + 3, + 70, + 1, + 17, + true, + "For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", + "For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb." + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 952100816256133417, + 13213635506632373675, + null, + null, + 7, + 18, + 7, + 18, + 2, + 4, + true, + "example Neo", + "example Neo" + ], + [ + "expression", + "wtoken-concatenation", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 329104162105779366, + 13727282245178536763, + null, + null, + 15, + 20, + 15, + 20, + 3, + 6, + true, + "Neo4J", + "Neo4J" + ], + [ + "numval", + "ival", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 17767354399704235156, + 1305421191768306174, + null, + null, + 18, + 19, + 18, + 19, + 4, + 5, + true, + "4", + "4" + ], + [ + "term", + "enum-term-mark-4", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 3801746241056215150, + 5355237089115374373, + null, + null, + 19, + 69, + 19, + 69, + 5, + 16, + true, + "J, Titan, JanusGraph, Amazon Neptune, and Arangodb", + "J, Titan, JanusGraph, Amazon Neptune, and Arangodb" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 329104161841320944, + 13364385693051315282, + null, + null, + 22, + 27, + 22, + 27, + 7, + 8, + true, + "Titan", + "Titan" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 1737775650888870515, + 5145686494756741983, + null, + null, + 29, + 39, + 29, + 39, + 9, + 10, + true, + "JanusGraph", + "JanusGraph" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 18066135526428419828, + 16363518137721762265, + null, + null, + 41, + 55, + 41, + 55, + 11, + 13, + true, + "Amazon Neptune", + "Amazon Neptune" + ], + [ + "term", + "single-term", + 16552665876195410077, + "TEXT", + "#/texts/159", + 1.0, + 14650296439291036599, + 17985905875417800583, + null, + null, + 61, + 69, + 61, + 69, + 15, + 16, + true, + "Arangodb", + "Arangodb" + ], + [ + "sentence", + "improper", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 15441160910540903299, + 12657466972806319238, + null, + null, + 0, + 6, + 0, + 2, + 0, + 1, + true, + "\u2020\u2020", + "\u2020\u2020" + ], + [ + "sentence", + "proper", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 12617989556159965278, + 16648537282536463551, + null, + null, + 7, + 211, + 3, + 207, + 1, + 65, + true, + "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", + "This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)." + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 3927729088961860971, + 10861366598444773863, + null, + null, + 12, + 31, + 8, + 27, + 2, + 4, + true, + "memory architecture", + "memory architecture" + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 329104161841320944, + 6995505371408985384, + null, + null, + 58, + 63, + 54, + 59, + 8, + 9, + true, + "Titan", + "Titan" + ], + [ + "parenthesis", + "round brackets", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 253594065264500809, + 15498824685726423077, + null, + null, + 64, + 128, + 60, + 124, + 9, + 33, + true, + "(http://s3.thinkaurelius.com/docs/titan/current/data-model.html)", + "(http://s3.thinkaurelius.com/docs/titan/current/data-model.html)" + ], + [ + "link", + "url", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 3438649888016089446, + 14315872303660489441, + null, + null, + 65, + 127, + 61, + 123, + 10, + 32, + true, + "http://s3.thinkaurelius.com/docs/titan/current/data-model.html", + "http://s3.thinkaurelius.com/docs/titan/current/data-model.html" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235163, + 14663762662264921246, + null, + null, + 73, + 74, + 69, + 70, + 15, + 16, + true, + "3", + "3" + ], + [ + "expression", + "wtoken-concatenation", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 329104162105779366, + 6977200025242982444, + null, + null, + 133, + 138, + 129, + 134, + 34, + 37, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 12178341415896300292, + 967785586605209129, + null, + null, + 133, + 136, + 129, + 132, + 34, + 35, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235156, + 14663762663007797994, + null, + null, + 136, + 137, + 132, + 133, + 35, + 36, + true, + "4", + "4" + ], + [ + "parenthesis", + "round brackets", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 2281494353586706787, + 8157085761115684525, + null, + null, + 139, + 210, + 135, + 206, + 37, + 64, + true, + "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)", + "(http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html)" + ], + [ + "link", + "url", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 9361941850829391161, + 1324878578738734655, + null, + null, + 140, + 209, + 136, + 205, + 38, + 63, + true, + "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html", + "http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html" + ], + [ + "numval", + "year", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 389609625548777059, + 14748978429801291102, + null, + null, + 178, + 182, + 174, + 178, + 52, + 53, + true, + "2015", + "2015" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 15441160910541481913, + 12659057306413090614, + null, + null, + 183, + 185, + 179, + 181, + 54, + 55, + true, + "02", + "02" + ], + [ + "numval", + "ival", + 17579390613842440572, + "TEXT", + "#/texts/160", + 1.0, + 17767354399704235156, + 14663762663007808920, + null, + null, + 189, + 190, + 185, + 186, + 57, + 58, + true, + "4", + "4" + ], + [ + "sentence", + "improper", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 15713827668903361733, + 72772065845729394, + null, + null, + 0, + 156, + 0, + 152, + 0, + 40, + true, + "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", + "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms" + ], + [ + "expression", + "wtoken-concatenation", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 329104162105779366, + 17682593486665884844, + null, + null, + 16, + 21, + 12, + 17, + 3, + 6, + true, + "Neo4J", + "Neo4J" + ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12178341415896300292, + 17809976417017763541, + null, + null, + 16, + 19, + 12, + 15, + 3, + 4, + true, + "Neo", + "Neo" + ], + [ + "numval", + "ival", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 17767354399704235156, + 17688058591094674309, + null, + null, + 19, + 20, + 15, + 16, + 4, + 5, + true, + "4", + "4" + ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 6165957175602596780, + 12883719775212934404, + null, + null, + 27, + 36, + 23, + 32, + 8, + 9, + true, + "reference", + "reference" + ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 17930183089767229669, + 15389002456544844346, + null, + null, + 68, + 99, + 64, + 95, + 15, + 19, + true, + "popular graph database solution", + "popular graph database solution" + ], + [ + "link", + "url", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12568677210829628871, + 1680746501251640588, + null, + null, + 105, + 139, + 101, + 135, + 21, + 35, + true, + "https://db-engines.com/en/ranking_", + "https://db-engines.com/en/ranking_" + ], + [ + "expression", + "wtoken-concatenation", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 12568677210829628871, + 1680746501251640588, + null, + null, + 105, + 139, + 101, + 135, + 21, + 35, + true, + "https://db-engines.com/en/ranking_", + "https://db-engines.com/en/ranking_" + ], + [ + "term", + "single-term", + 722212543953276862, + "TEXT", + "#/texts/161", + 1.0, + 2831507266554097914, + 1808153544976831155, + null, + null, + 146, + 156, + 142, + 152, + 37, + 40, + true, + "graph+dbms", + "graph+dbms" + ], + [ + "sentence", + "improper", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 7449211522826545008, + 18401885558501803202, + null, + null, + 0, + 25, + 0, + 23, + 0, + 10, + true, + "\u00a7\u00a7 http://graph500.org/", + "\u00a7\u00a7 http://graph500.org/" + ], + [ + "reference", + "authors", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 15441160910541474145, + 13051332902755974487, + null, + null, + 0, + 4, + 0, + 2, + 0, + 1, + true, + "\u00a7\u00a7", + "\u00a7\u00a7" + ], + [ + "reference", + "url", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 1244385257359010144, + 3127203609822040452, + null, + null, + 5, + 25, + 3, + 23, + 1, + 10, + true, + "http://graph500.org/", + "http://graph500.org/" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 389609625695173007, + 3836236615687027220, + null, + null, + 5, + 9, + 3, + 7, + 1, + 2, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 329104159211247965, + 10630887676941884603, + null, + null, + 12, + 17, + 10, + 15, + 5, + 6, + true, + "graph", + "graph" + ], + [ + "term", + "single-term", + 11085577343317113173, + "TEXT", + "#/texts/162", + 1.0, + 389609625618846162, + 3823428058951951811, + null, + null, + 21, + 25, + 19, + 23, + 8, + 10, + true, + "org/", + "org/" + ], + [ + "sentence", + "improper", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 16747146533825186967, + 2165348395015827092, + null, + null, + 0, + 54, + 0, + 52, + 0, + 18, + true, + "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", + "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html" + ], + [ + "reference", + "reference-number", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 15441160910541473069, + 11916476354364763757, + null, + null, + 0, + 4, + 0, + 2, + 0, + 1, + true, + "\u00b6\u00b6", + "\u00b6\u00b6" + ], + [ + "reference", + "url", + 1792096630133661292, + "TEXT", + "#/texts/163", + 1.0, + 773494417653844359, + 2919336056783602673, + null, + null, + 5, + 54, + 3, + 52, + 1, + 18, + true, + "https://snap.stanford.edu/data/higgs-twitter.html", + "https://snap.stanford.edu/data/higgs-twitter.html" + ], + [ + "sentence", + "improper", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 12178341415896407674, + 16045680922123672072, + null, + null, + 0, + 3, + 0, + 3, + 0, + 1, + true, + "***", + "***" + ], + [ + "sentence", + "proper", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 8767715734654495558, + 12563470467547715840, + null, + null, + 4, + 61, + 4, + 61, + 1, + 13, + true, + "We assume the weight can be represented by a float value.", + "We assume the weight can be represented by a float value." + ], + [ + "term", + "single-term", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 16381206557786164800, + 5728702803374294286, + null, + null, + 18, + 24, + 18, + 24, + 4, + 5, + true, + "weight", + "weight" + ], + [ + "term", + "single-term", + 11462638369524745676, + "TEXT", + "#/texts/164", + 1.0, + 1473558314070085366, + 13523311624596995819, + null, + null, + 49, + 60, + 49, + 60, + 10, + 12, + true, + "float value", + "float value" + ], + [ + "sentence", + "improper", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 4512570954370983408, + 11763158631698282386, + null, + null, + 0, + 75, + 0, + 69, + 0, + 23, + true, + "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", + "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + ], + [ + "reference", + "reference-number", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 12178341417198250328, + 1575262081256116873, + null, + null, + 0, + 9, + 0, + 3, + 0, + 1, + true, + "\u2020\u2020\u2020", + "\u2020\u2020\u2020" + ], + [ + "reference", + "url", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 14268042929131437441, + 234824939381677632, + null, + null, + 10, + 75, + 4, + 69, + 1, + 23, + true, + "https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", + "https://neo4j.com/developer/guide-sizing-and-hardware-calculator/" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 329104161533497127, + 16180224231918255666, + null, + null, + 10, + 15, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 3943595989042214060, + 16915067796660432078, + null, + null, + 24, + 37, + 18, + 31, + 9, + 12, + true, + "com/developer", + "com/developer" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 14637910066475074126, + 6114961828553378919, + null, + null, + 55, + 63, + 49, + 57, + 19, + 20, + true, + "hardware", + "hardware" + ], + [ + "term", + "single-term", + 16611805225457383637, + "TEXT", + "#/texts/165", + 1.0, + 2879593163591796188, + 5335026245912853509, + null, + null, + 64, + 74, + 58, + 68, + 21, + 22, + true, + "calculator", + "calculator" + ], + [ + "sentence", + "improper", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 16922240937803157180, + 3329452043224775053, + null, + null, + 0, + 43, + 0, + 37, + 0, + 11, + true, + "\u2021\u2021\u2021 https://www.naturalearthdata.com/", + "\u2021\u2021\u2021 https://www.naturalearthdata.com/" + ], + [ + "reference", + "reference-number", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 12178341417198254221, + 18213045800656724647, + null, + null, + 0, + 9, + 0, + 3, + 0, + 1, + true, + "\u2021\u2021\u2021", + "\u2021\u2021\u2021" + ], + [ + "reference", + "url", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 10760936391898733584, + 8275004636990824295, + null, + null, + 10, + 43, + 4, + 37, + 1, + 11, + true, + "https://www.naturalearthdata.com/", + "https://www.naturalearthdata.com/" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 329104161533497127, + 17768638491100025109, + null, + null, + 10, + 15, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 12178341415895524451, + 18051100498086497778, + null, + null, + 18, + 21, + 12, + 15, + 5, + 6, + true, + "www", + "www" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 2943004857435312037, + 9330444828971529586, + null, + null, + 22, + 38, + 16, + 32, + 7, + 8, + true, + "naturalearthdata", + "naturalearthdata" + ], + [ + "term", + "single-term", + 1531505125666754945, + "TEXT", + "#/texts/166", + 1.0, + 389609625695971718, + 4325380352130131677, + null, + null, + 39, + 43, + 33, + 37, + 9, + 11, + true, + "com/", + "com/" + ], + [ + "sentence", + "improper", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 2845896203864732456, + 4760469342904968768, + null, + null, + 0, + 36, + 0, + 33, + 0, + 11, + true, + "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", + "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/" + ], + [ + "reference", + "reference-number", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 12178341415889410024, + 11239483387003711537, + null, + null, + 0, + 6, + 0, + 3, + 0, + 1, + true, + "\u00a7\u00a7\u00a7", + "\u00a7\u00a7\u00a7" + ], + [ + "reference", + "url", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 449425663079441853, + 5532800466031663479, + null, + null, + 7, + 36, + 4, + 33, + 1, + 11, + true, + "https://www.ccreservoirs.com/", + "https://www.ccreservoirs.com/" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 329104161533497127, + 4064657654566889450, + null, + null, + 7, + 12, + 4, + 9, + 1, + 2, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 12178341415895524451, + 11239821040174356665, + null, + null, + 15, + 18, + 12, + 15, + 5, + 6, + true, + "www", + "www" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 4898272711883537501, + 319067657400806549, + null, + null, + 19, + 31, + 16, + 28, + 7, + 8, + true, + "ccreservoirs", + "ccreservoirs" + ], + [ + "term", + "single-term", + 15684389308320953629, + "TEXT", + "#/texts/167", + 1.0, + 389609625695971718, + 13520393129950967142, + null, + null, + 32, + 36, + 29, + 33, + 9, + 11, + true, + "com/", + "com/" + ], + [ + "sentence", + "improper", + 14590754343934702701, + "TEXT", + "#/texts/168", + 1.0, + 1858797456585454232, + 2809842144121602219, + null, + null, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "REFERENCES", + "REFERENCES" + ], + [ + "reference", + "authors", + 14590754343934702701, + "TEXT", + "#/texts/168", + 1.0, + 1858797456585454232, + 2809842144121602219, + null, + null, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "REFERENCES", + "REFERENCES" + ], + [ + "term", + "single-term", + 14590754343934702701, + "TEXT", + "#/texts/168", + 1.0, + 1858797456585454232, + 2809842144121602219, + null, + null, + 0, + 10, + 0, + 10, + 0, + 1, + true, + "REFERENCES", + "REFERENCES" + ], + [ + "sentence", + "improper", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 15441160910541481980, + 8386387568042747678, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "1.", + "1." + ], + [ + "reference", + "reference-number", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 17767354399704235161, + 16208788960124925204, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "1", + "1." + ], + [ + "sentence", + "proper", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 2064192796631964770, + 16259707831430260005, + null, + null, + 3, + 137, + 3, + 137, + 2, + 29, + true, + "Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale.", + "Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale." + ], + [ + "reference", + "authors", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14045775576648193325, + 8244242289281145129, + null, + null, + 3, + 52, + 3, + 52, + 2, + 15, + true, + "Staar Peter WJ, Michele D, Christoph A, Costas B", + "Staar Peter WJ, Michele D, Christoph A, Costas B." + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 11879540473470058199, + 12427853451193245392, + null, + null, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6613162031266505134, + 16138057201536909006, + null, + null, + 19, + 28, + 19, + 28, + 6, + 8, + true, + "Michele D", + "Michele D" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 4457167794784606628, + 16487730286724222122, + null, + null, + 30, + 41, + 30, + 41, + 9, + 11, + true, + "Christoph A", + "Christoph A" + ], + [ + "name", + "person-name", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 455015420489078976, + 1285989015139281970, + null, + null, + 43, + 59, + 43, + 59, + 12, + 16, + true, + "Costas B Corpus", + "Costas B. Corpus" + ], + [ + "reference", + "title", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 10495776784264289684, + 6718213806780973142, + null, + null, + 53, + 146, + 53, + 146, + 15, + 33, + true, + "Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18", + "Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18." + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 7881558880483647069, + 4390339138058947972, + null, + null, + 53, + 78, + 53, + 78, + 15, + 18, + true, + "Corpus conversion service", + "Corpus conversion service" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 8106464587473865376, + 4936495746156049501, + null, + null, + 82, + 89, + 82, + 89, + 20, + 21, + true, + "machine", + "machine" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14814125365076808131, + 17712433670698462707, + null, + null, + 99, + 107, + 99, + 107, + 22, + 23, + true, + "platform", + "platform" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 6167933651658664291, + 12661206916294760912, + null, + null, + 118, + 127, + 118, + 127, + 25, + 26, + true, + "documents", + "documents" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 329104161785194305, + 16612919954372115180, + null, + null, + 131, + 136, + 131, + 136, + 27, + 28, + true, + "scale", + "scale" + ], + [ + "sentence", + "proper", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14650432707062542657, + 11273901130686106136, + null, + null, + 138, + 146, + 138, + 146, + 29, + 33, + true, + "KDD '18.", + "KDD '18." + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 12178341415896253943, + 16661690143811416648, + null, + null, + 138, + 141, + 138, + 141, + 29, + 30, + true, + "KDD", + "KDD" + ], + [ + "sentence", + "proper", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 5922527567909474023, + 10848884210820586202, + null, + null, + 147, + 179, + 147, + 179, + 33, + 46, + true, + "New York, NY: ACM; 2018:774-782.", + "New York, NY: ACM; 2018:774-782." + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 14650948201816210252, + 5761777774481409935, + null, + null, + 147, + 155, + 147, + 155, + 33, + 35, + true, + "New York", + "New York" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 15441160910541487804, + 8386387571143486082, + null, + null, + 157, + 159, + 157, + 159, + 36, + 37, + true, + "NY", + "NY" + ], + [ + "term", + "single-term", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 12178341415896228980, + 16661682738511655292, + null, + null, + 161, + 164, + 161, + 164, + 38, + 39, + true, + "ACM", + "ACM" + ], + [ + "reference", + "date", + 10480452763767134455, + "TEXT", + "#/texts/169", + 1.0, + 15683444325968468739, + 16996762089080527682, + null, + null, + 166, + 179, + 166, + 179, + 40, + 46, + true, + "2018:774-782", + "2018:774-782." + ], + [ + "sentence", + "improper", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 15441160910541481780, + 9679667702159864047, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "2.", + "2." + ], + [ + "reference", + "reference-number", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 17767354399704235162, + 7639029136784882064, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "2", + "2." + ], + [ + "sentence", + "proper", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 5941740972901141891, + 8159805389130902539, + null, + null, + 3, + 130, + 3, + 130, + 2, + 30, + true, + "Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance.", + "Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance." + ], + [ + "reference", + "authors", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 4357204087924678948, + 11503245573532489830, + null, + null, + 3, + 42, + 3, + 42, + 2, + 15, + true, + "Staar Peter WJ, Kl BP, Roxana I, et al", + "Staar Peter WJ, Kl BP, Roxana I, et al." + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 11879540473470058199, + 6818801233014041471, + null, + null, + 3, + 17, + 3, + 17, + 2, + 5, + true, + "Staar Peter WJ", + "Staar Peter WJ" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 329104159232588720, + 1186563503698797045, + null, + null, + 19, + 24, + 19, + 24, + 6, + 8, + true, + "Kl BP", + "Kl BP" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14652187939873997159, + 718674333250886747, + null, + null, + 26, + 34, + 26, + 34, + 9, + 11, + true, + "Roxana I", + "Roxana I" + ], + [ + "expression", + "common", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 329104162180805867, + 2101201624583688644, + null, + null, + 36, + 42, + 36, + 42, + 12, + 15, + true, + "et al", + "et al." + ], + [ + "reference", + "title", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 7105706713138331748, + 8882313339767931654, + null, + null, + 43, + 130, + 43, + 130, + 15, + 30, + true, + "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance", + "Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance." + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 12294323379189513058, + 6497780854465852359, + null, + null, + 43, + 60, + 43, + 60, + 15, + 17, + true, + "Stochastic Matrix", + "Stochastic Matrix" + ], + [ + "name", + "name-concatenation", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 4549168941779565045, + 3298984056937140542, + null, + null, + 54, + 69, + 54, + 69, + 16, + 19, + true, + "Matrix-Function", + "Matrix-Function" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8793329599149202578, + 318724713861878505, + null, + null, + 61, + 80, + 61, + 80, + 18, + 20, + true, + "Function Estimators", + "Function Estimators" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 9598948590671100886, + 9715436593583823660, + null, + null, + 82, + 94, + 82, + 94, + 21, + 23, + true, + "Scalable Big", + "Scalable Big" + ], + [ + "name", + "name-concatenation", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14650423007673892384, + 14713090862316278550, + null, + null, + 91, + 99, + 91, + 99, + 22, + 25, + true, + "Big-Data", + "Big-Data" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 9448405554431222338, + 2183763151164676504, + null, + null, + 95, + 107, + 95, + 107, + 24, + 26, + true, + "Data Kernels", + "Data Kernels" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 4824230233499551867, + 7853881416888487608, + null, + null, + 113, + 129, + 113, + 129, + 27, + 29, + true, + "High Performance", + "High Performance" + ], + [ + "sentence", + "proper", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 14086539689317188783, + 9623126981639275921, + null, + null, + 131, + 163, + 131, + 163, + 30, + 42, + true, + "Chicago, IL: IEEE; 2016:812-821.", + "Chicago, IL: IEEE; 2016:812-821." + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 8106350741667376964, + 2037770047407614341, + null, + null, + 131, + 138, + 131, + 138, + 30, + 31, + true, + "Chicago", + "Chicago" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 15441160910541480320, + 9679667231859700756, + null, + null, + 140, + 142, + 140, + 142, + 32, + 33, + true, + "IL", + "IL" + ], + [ + "term", + "single-term", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 389609625537951687, + 17963625958813759677, + null, + null, + 144, + 148, + 144, + 148, + 34, + 35, + true, + "IEEE", + "IEEE" + ], + [ + "reference", + "date", + 11866471329779366855, + "TEXT", + "#/texts/170", + 1.0, + 325347433255123998, + 9431696322833619114, + null, + null, + 150, + 163, + 150, + 163, + 36, + 42, + true, + "2016:812-821", + "2016:812-821." + ], + [ + "sentence", + "improper", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 15441160910541481845, + 8041722171934135301, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "3.", + "3." + ], + [ + "reference", + "reference-number", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 17767354399704235163, + 13510159049290326505, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "3", + "3." + ], + [ + "sentence", + "proper", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 10366826046347151057, + 3201175641693388735, + null, + null, + 3, + 140, + 3, + 140, + 2, + 28, + true, + "Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries.", + "Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries." + ], + [ + "reference", + "authors", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 9243870653745040564, + 2494378156442366016, + null, + null, + 3, + 43, + 3, + 43, + 2, + 16, + true, + "Matteo M, Christoph A, Val'ery W, et al", + "Matteo M, Christoph A, Val'ery W, et al." + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 14650311461945683358, + 1978144735469983705, + null, + null, + 3, + 11, + 3, + 11, + 2, + 4, + true, + "Matteo M", + "Matteo M" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 4457167794784606628, + 3737697229009384388, + null, + null, + 13, + 24, + 13, + 24, + 5, + 7, + true, + "Christoph A", + "Christoph A" + ], + [ + "expression", + "wtoken-concatenation", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 8106351859305413568, + 12876005663384173407, + null, + null, + 26, + 33, + 26, + 33, + 8, + 11, + true, + "Val'ery", + "Val'ery" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 12178341415895534152, + 17785966595483797392, + null, + null, + 26, + 29, + 26, + 29, + 8, + 9, + true, + "Val", + "Val" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 329104162291037424, + 17350054630895668920, + null, + null, + 30, + 35, + 30, + 35, + 10, + 12, + true, + "ery W", + "ery W" + ], + [ + "expression", + "common", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 329104162180805867, + 17409365616313413437, + null, + null, + 37, + 43, + 37, + 43, + 13, + 16, + true, + "et al", + "et al." + ], + [ + "reference", + "title", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 14518759528420507379, + 35296972575901154, + null, + null, + 44, + 140, + 44, + 140, + 16, + 28, + true, + "An information extraction and knowledge graph platform for accelerating biochemical discoveries", + "An information extraction and knowledge graph platform for accelerating biochemical discoveries." + ], + [ + "term", + "enum-term-mark-2", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16358141361454762264, + 16975235676379792590, + null, + null, + 47, + 98, + 47, + 98, + 17, + 23, + true, + "information extraction and knowledge graph platform", + "information extraction and knowledge graph platform" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 8220196561360771086, + 11976237431337447962, + null, + null, + 47, + 69, + 47, + 69, + 17, + 19, + true, + "information extraction", + "information extraction" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 9096096466746800436, + 299601853962247456, + null, + null, + 74, + 98, + 74, + 98, + 20, + 23, + true, + "knowledge graph platform", + "knowledge graph platform" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16380427451761946440, + 1088776306081422918, + null, + null, + 116, + 139, + 116, + 139, + 25, + 27, + true, + "biochemical discoveries", + "biochemical discoveries" + ], + [ + "sentence", + "proper", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 4521272801426400378, + 9677593514014566176, + null, + null, + 141, + 168, + 141, + 168, + 28, + 38, + true, + "ArXiv.abs/1907.08400; 2019.", + "ArXiv.abs/1907.08400; 2019." + ], + [ + "expression", + "wtoken-concatenation", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 11904308365999439423, + 5480725305735692275, + null, + null, + 141, + 161, + 141, + 161, + 28, + 35, + true, + "ArXiv.abs/1907.08400", + "ArXiv.abs/1907.08400" + ], + [ + "reference", + "journal", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 7543597897356589805, + 187532807533800461, + null, + null, + 141, + 151, + 141, + 151, + 28, + 32, + true, + "ArXiv.abs/", + "ArXiv.abs/" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 329104162009513145, + 17357826688115480551, + null, + null, + 141, + 146, + 141, + 146, + 28, + 29, + true, + "ArXiv", + "ArXiv" + ], + [ + "term", + "single-term", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 12178341415895572042, + 17785966987407051849, + null, + null, + 147, + 150, + 147, + 150, + 30, + 31, + true, + "abs", + "abs" + ], + [ + "reference", + "volume", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 948495657295850540, + 12052824091433651138, + null, + null, + 151, + 161, + 151, + 161, + 32, + 35, + true, + "1907.08400", + "1907.08400" + ], + [ + "reference", + "date", + 6016885898370676469, + "TEXT", + "#/texts/171", + 1.0, + 16381206542172555288, + 10693536807570486685, + null, + null, + 161, + 168, + 161, + 168, + 35, + 38, + true, + "; 2019", + "; 2019." + ], + [ + "sentence", + "improper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15441160910541486262, + 13393766537274350374, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "4.", + "4." + ], + [ + "reference", + "reference-number", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 17767354399704235156, + 2787669627718018158, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "4", + "4." + ], + [ + "sentence", + "proper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 18201779004646765015, + 6525534015679273683, + null, + null, + 3, + 176, + 3, + 176, + 2, + 38, + true, + "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019).", + "Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019)." + ], + [ + "reference", + "authors", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8145380721974590875, + 8036423230253362696, + null, + null, + 3, + 51, + 3, + 51, + 2, + 17, + true, + "Paolo R, Marco P, Floriana B, Peter S, Costas B", + "Paolo R, Marco P, Floriana B, Peter S, Costas B." + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352039693059414, + 189526913306248274, + null, + null, + 3, + 10, + 3, + 10, + 2, + 4, + true, + "Paolo R", + "Paolo R" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106471247241844081, + 12829126084417792103, + null, + null, + 12, + 19, + 12, + 19, + 5, + 7, + true, + "Marco P", + "Marco P" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15356089124994678984, + 18000216761919637454, + null, + null, + 21, + 31, + 21, + 31, + 8, + 10, + true, + "Floriana B", + "Floriana B" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352035144611657, + 2775049790770760163, + null, + null, + 33, + 40, + 33, + 40, + 11, + 13, + true, + "Peter S", + "Peter S" + ], + [ + "name", + "person-name", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8085653376282374091, + 8263479519718862087, + null, + null, + 42, + 63, + 42, + 63, + 14, + 18, + true, + "Costas B Application", + "Costas B. Application" + ], + [ + "reference", + "title", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 89727683796184421, + 11248393883266780918, + null, + null, + 52, + 223, + 52, + 223, + 17, + 44, + true, + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition &", + "Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition &" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 10490888699425605498, + 7759298453024144101, + null, + null, + 67, + 92, + 67, + 92, + 19, + 21, + true, + "Geocognitive Technologies", + "Geocognitive Technologies" + ], + [ + "term", + "enum-term-mark-4", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 16886334362487979110, + 15663185702252571998, + null, + null, + 96, + 129, + 96, + 129, + 22, + 27, + true, + "Basin & Petroleum System Analyses", + "Basin & Petroleum System Analyses" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 329104162065456823, + 743590677500510925, + null, + null, + 96, + 101, + 96, + 101, + 22, + 23, + true, + "Basin", + "Basin" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 6282754256473030155, + 5108804280048681346, + null, + null, + 104, + 129, + 104, + 129, + 24, + 27, + true, + "Petroleum System Analyses", + "Petroleum System Analyses" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 329104161846736203, + 3663224801483387974, + null, + null, + 131, + 136, + 131, + 136, + 28, + 29, + true, + "Texas", + "Texas" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8106352717733900272, + 2525138794754357211, + null, + null, + 138, + 145, + 138, + 145, + 30, + 31, + true, + "Society", + "Society" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 1957667287048702282, + 14636254714870854935, + null, + null, + 149, + 168, + 149, + 168, + 32, + 34, + true, + "Petroleum Engineers", + "Petroleum Engineers" + ], + [ + "sentence", + "proper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 8047292080261477252, + 3759318704396486843, + null, + null, + 177, + 256, + 177, + 256, + 38, + 54, + true, + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10.", + "Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10." + ], + [ + "term", + "enum-term-mark-4", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 5050260885807546595, + 15713736725428197202, + null, + null, + 177, + 234, + 177, + 234, + 38, + 45, + true, + "Abu Dhabi International Petroleum Exhibition & Conference", + "Abu Dhabi International Petroleum Exhibition & Conference" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 1607524687542961615, + 11251152050070639401, + null, + null, + 177, + 221, + 177, + 221, + 38, + 43, + true, + "Abu Dhabi International Petroleum Exhibition", + "Abu Dhabi International Petroleum Exhibition" + ], + [ + "reference", + "conference", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 3847339587475413410, + 2047970562154800974, + null, + null, + 224, + 251, + 224, + 251, + 44, + 51, + true, + "Conference, Abu Dhabi, UAE", + "Conference, Abu Dhabi, UAE," + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 969963630422387313, + 12193752692984421564, + null, + null, + 224, + 234, + 224, + 234, + 44, + 45, + true, + "Conference", + "Conference" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 6563080480676520350, + 6473658907824821571, + null, + null, + 236, + 245, + 236, + 245, + 46, + 48, + true, + "Abu Dhabi", + "Abu Dhabi" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415895651112, + 16626961418662622345, + null, + null, + 247, + 250, + 247, + 250, + 49, + 50, + true, + "UAE", + "UAE" + ], + [ + "reference", + "pages", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896216312, + 16626963629120408485, + null, + null, + 252, + 256, + 252, + 256, + 51, + 54, + true, + ":10", + ":10." + ], + [ + "expression", + "wtoken-concatenation", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896216312, + 16626963629120408490, + null, + null, + 252, + 255, + 252, + 255, + 51, + 53, + true, + ":10", + ":10" + ], + [ + "reference", + "doi", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 11673547348366864691, + 4147257630836829657, + null, + null, + 257, + 292, + 257, + 292, + 54, + 70, + true, + "https://doi.org/10.2118/197610-MS", + "https://doi. org/10.2118/197610-MS." + ], + [ + "sentence", + "proper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 5857244370669890274, + 17990747492643866277, + null, + null, + 257, + 269, + 257, + 269, + 54, + 60, + true, + "https://doi.", + "https://doi." + ], + [ + "expression", + "wtoken-concatenation", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 7742135058095281026, + 17571544217117981683, + null, + null, + 257, + 268, + 257, + 268, + 54, + 59, + true, + "https://doi", + "https://doi" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 329104161533497127, + 3569589394363709575, + null, + null, + 257, + 262, + 257, + 262, + 54, + 55, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415895452239, + 16627061552361506567, + null, + null, + 265, + 268, + 265, + 268, + 58, + 59, + true, + "doi", + "doi" + ], + [ + "sentence", + "improper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15557566671061207768, + 230972920426777869, + null, + null, + 270, + 289, + 270, + 289, + 60, + 68, + true, + "org/10.2118/197610-", + "org/10.2118/197610-" + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415895623052, + 16626965306587567269, + null, + null, + 270, + 273, + 270, + 273, + 60, + 61, + true, + "org", + "org" + ], + [ + "sentence", + "proper", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 12178341415896278272, + 16627063529584884912, + null, + null, + 289, + 292, + 289, + 292, + 68, + 70, + true, + "MS.", + "MS." + ], + [ + "term", + "single-term", + 13946275785662847920, + "TEXT", + "#/texts/172", + 1.0, + 15441160910541480634, + 13393758459708113122, + null, + null, + 289, + 291, + 289, + 291, + 68, + 69, + true, + "MS", + "MS" + ], + [ + "sentence", + "improper", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 15441160910541486327, + 5428431164759035833, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "5.", + "5." + ], + [ + "reference", + "reference-number", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 17767354399704235157, + 9080683344301571168, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "5", + "5." + ], + [ + "sentence", + "proper", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 4850647667861134344, + 5505550372083259738, + null, + null, + 3, + 171, + 3, + 171, + 2, + 34, + true, + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016." + ], + [ + "reference", + "authors", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 4212509100547346489, + 17078400926856851527, + null, + null, + 3, + 55, + 3, + 55, + 2, + 17, + true, + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D", + "Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D." + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 3027248490321213074, + 16283814403211008850, + null, + null, + 3, + 14, + 3, + 14, + 2, + 4, + true, + "Guillaume L", + "Guillaume L" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650310996645589292, + 14357325801323977565, + null, + null, + 16, + 24, + 16, + 24, + 5, + 7, + true, + "Miguel B", + "Miguel B" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 6049415556904669075, + 4491667145265607561, + null, + null, + 26, + 35, + 26, + 35, + 8, + 10, + true, + "Sandeep S", + "Sandeep S" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 14650438760956024332, + 12941354247565292233, + null, + null, + 37, + 45, + 37, + 45, + 11, + 13, + true, + "Kazuya K", + "Kazuya K" + ], + [ + "name", + "person-name", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9660792047811639733, + 1337128772214092214, + null, + null, + 47, + 62, + 47, + 62, + 14, + 18, + true, + "Chris D Neural", + "Chris D. Neural" + ], + [ + "reference", + "title", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 16200640505386782750, + 16685934982266491450, + null, + null, + 56, + 165, + 56, + 165, + 17, + 32, + true, + "Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics;", + "Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics;" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9764460566162632960, + 16642624291773848144, + null, + null, + 56, + 76, + 56, + 76, + 17, + 19, + true, + "Neural Architectures", + "Neural Architectures" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 9361732498059105399, + 13956982048443319080, + null, + null, + 81, + 105, + 81, + 105, + 20, + 23, + true, + "Named Entity Recognition", + "Named Entity Recognition" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 295551369126235776, + 10028312936701107065, + null, + null, + 107, + 121, + 107, + 121, + 24, + 26, + true, + "Stroudsburg PA", + "Stroudsburg PA" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 7719000109186773037, + 1029739931494122980, + null, + null, + 123, + 134, + 123, + 134, + 27, + 28, + true, + "Association", + "Association" + ], + [ + "term", + "single-term", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 16550803490317182276, + 1120983039058145469, + null, + null, + 139, + 164, + 139, + 164, + 29, + 31, + true, + "Computational Linguistics", + "Computational Linguistics" + ], + [ + "reference", + "date", + 7693798302433367973, + "TEXT", + "#/texts/173", + 1.0, + 389609625548777056, + 1791736220903901961, + null, + null, + 166, + 171, + 166, + 171, + 32, + 34, + true, + "2016", + "2016." + ], + [ + "sentence", + "improper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 15441160910541481013, + 5942897417109902577, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "6.", + "6." + ], + [ + "reference", + "reference-number", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 17767354399704235158, + 2935027410945303088, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "6", + "6." + ], + [ + "sentence", + "proper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 16149852804597872204, + 9077761784460652022, + null, + null, + 3, + 80, + 3, + 80, + 2, + 18, + true, + "Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs.", + "Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs." + ], + [ + "reference", + "authors", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 3481924782295664840, + 935617422453535067, + null, + null, + 3, + 25, + 3, + 25, + 2, + 9, + true, + "Chiu Jason PC, Eric N", + "Chiu Jason PC, Eric N." + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 12139207556299923335, + 12395232115938598978, + null, + null, + 3, + 16, + 3, + 16, + 2, + 5, + true, + "Chiu Jason PC", + "Chiu Jason PC" + ], + [ + "name", + "person-name", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 8669939107464889919, + 15575238431294334172, + null, + null, + 18, + 31, + 18, + 31, + 6, + 10, + true, + "Eric N Named", + "Eric N. Named" + ], + [ + "reference", + "title", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 16636370883913883252, + 5810162511985509682, + null, + null, + 26, + 80, + 26, + 80, + 9, + 18, + true, + "Named entity recognition with bidirectional LSTM-CNNs", + "Named entity recognition with bidirectional LSTM-CNNs." + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 6624594430573868561, + 9300875014556458820, + null, + null, + 32, + 50, + 32, + 50, + 10, + 12, + true, + "entity recognition", + "entity recognition" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 16662373297777570178, + 8833856393204507228, + null, + null, + 56, + 74, + 56, + 74, + 13, + 15, + true, + "bidirectional LSTM", + "bidirectional LSTM" + ], + [ + "expression", + "word-concatenation", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 6627885913248971716, + 14160903326793315633, + null, + null, + 70, + 79, + 70, + 79, + 14, + 17, + true, + "LSTM-CNNs", + "LSTM-CNNs" + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625526733162, + 1673278111907317242, + null, + null, + 75, + 79, + 75, + 79, + 16, + 17, + true, + "CNNs", + "CNNs" + ], + [ + "sentence", + "proper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 329104162087785161, + 9946009003015376905, + null, + null, + 81, + 86, + 81, + 86, + 18, + 20, + true, + "TACL.", + "TACL." + ], + [ + "reference", + "journal", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625541773713, + 1712767977156820575, + null, + null, + 81, + 86, + 81, + 86, + 18, + 20, + true, + "TACL", + "TACL." + ], + [ + "term", + "single-term", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 389609625541773713, + 1712767977156820574, + null, + null, + 81, + 85, + 81, + 85, + 18, + 19, + true, + "TACL", + "TACL" + ], + [ + "sentence", + "improper", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 900810462997696699, + 9296645351405634953, + null, + null, + 87, + 102, + 87, + 102, + 20, + 28, + true, + "2016;4:357-370.", + "2016;4:357-370." + ], + [ + "reference", + "date", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 329104147695661831, + 15059294784117209596, + null, + null, + 87, + 92, + 87, + 92, + 20, + 22, + true, + "2016;", + "2016;" + ], + [ + "reference", + "title", + 3109792572574236398, + "TEXT", + "#/texts/174", + 1.0, + 6498928726029246334, + 10334044460289682205, + null, + null, + 92, + 102, + 92, + 102, + 22, + 28, + true, + "4:357-370", + "4:357-370." + ], + [ + "sentence", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 15441160910541481076, + 14099067875649218598, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "7.", + "7." + ], + [ + "reference", + "reference-number", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 17767354399704235159, + 17892509173094146700, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "7", + "7." + ], + [ + "sentence", + "proper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 7307445048043317682, + 2555209552433482279, + null, + null, + 3, + 21, + 3, + 21, + 2, + 8, + true, + "Matthew H, Ines M.", + "Matthew H, Ines M." + ], + [ + "reference", + "authors", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 7850636613883620371, + 5463699822054275279, + null, + null, + 3, + 21, + 3, + 21, + 2, + 8, + true, + "Matthew H, Ines M", + "Matthew H, Ines M." + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 6611312511369759405, + 3019524304480366334, + null, + null, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Matthew H", + "Matthew H" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 16380809977742382038, + 12118911120655365706, + null, + null, + 14, + 20, + 14, + 20, + 5, + 7, + true, + "Ines M", + "Ines M" + ], + [ + "reference", + "title", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 8498785529703184960, + 117824391260336618, + null, + null, + 22, + 150, + 22, + 150, + 8, + 28, + true, + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear", + "spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear." + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 329104161639064018, + 15555581995259937868, + null, + null, + 22, + 27, + 22, + 27, + 8, + 9, + true, + "spaCy", + "spaCy" + ], + [ + "sentence", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 12178341415895638165, + 5486425860276194881, + null, + null, + 22, + 25, + 22, + 25, + 8, + 8, + false, + "spa", + "spa" + ], + [ + "sentence", + "proper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 18422321729110645108, + 14077627261602009953, + null, + null, + 25, + 139, + 25, + 139, + 8, + 25, + true, + "Cy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing.", + "Cy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing." + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 3070945404202872591, + 4809079122368752762, + null, + null, + 31, + 47, + 31, + 47, + 11, + 13, + true, + "natural language", + "natural language" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 18136559691621189433, + 7438263735663259264, + null, + null, + 67, + 83, + 67, + 83, + 15, + 17, + true, + "bloom embeddings", + "bloom embeddings" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 5074039139067361158, + 2646875362836900663, + null, + null, + 85, + 114, + 85, + 114, + 18, + 21, + true, + "convolutional neural networks", + "convolutional neural networks" + ], + [ + "term", + "single-term", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 2536592842635865927, + 13814970629892288506, + null, + null, + 119, + 138, + 119, + 138, + 22, + 24, + true, + "incremental parsing", + "incremental parsing" + ], + [ + "sentence", + "proper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 15865958309310945968, + 6557910677090579622, + null, + null, + 140, + 150, + 140, + 150, + 25, + 28, + true, + "To appear.", + "To appear." + ], + [ + "sentence", + "improper", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 329104147695661623, + 6425989175071208113, + null, + null, + 151, + 156, + 151, + 156, + 28, + 30, + true, + "2017.", + "2017." + ], + [ + "reference", + "date", + 8111170387462350170, + "TEXT", + "#/texts/175", + 1.0, + 389609625548777057, + 14192492111179186413, + null, + null, + 151, + 156, + 151, + 156, + 28, + 30, + true, + "2017", + "2017." + ], + [ + "sentence", + "improper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 15441160910541481399, + 8301553353386600029, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "8.", + "8." + ], + [ + "reference", + "reference-number", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 17767354399704235152, + 15651484829649486931, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "8", + "8." + ], + [ + "sentence", + "proper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14351638638233132553, + 9869358074812010297, + null, + null, + 3, + 138, + 3, + 138, + 2, + 32, + true, + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada.", + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada." + ], + [ + "reference", + "authors", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 5848297652864005955, + 13778801729762411640, + null, + null, + 3, + 54, + 3, + 54, + 2, + 18, + true, + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!", + "Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!)," + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6627095272342846459, + 8960025720845820047, + null, + null, + 3, + 12, + 3, + 12, + 2, + 4, + true, + "Magoon LB", + "Magoon LB" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 6563582333827106756, + 4026322596752919867, + null, + null, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Hudson TL", + "Hudson TL" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2902914387278523955, + 5025725615021492664, + null, + null, + 25, + 34, + 25, + 34, + 8, + 10, + true, + "Peters KE", + "Peters KE" + ], + [ + "name", + "person-name", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 8106396146029139849, + 4756990361978769041, + null, + null, + 33, + 41, + 33, + 41, + 9, + 12, + true, + "E Egret", + "E. Egret" + ], + [ + "expression", + "wtoken-concatenation", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 17069592624661941498, + 4631155417717592897, + null, + null, + 36, + 53, + 36, + 53, + 11, + 17, + true, + "Egret-Hibernia(!)", + "Egret-Hibernia(!)" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14652303295027260495, + 4867258259954006452, + null, + null, + 42, + 50, + 42, + 50, + 13, + 14, + true, + "Hibernia", + "Hibernia" + ], + [ + "parenthesis", + "round brackets", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 12178341415896398102, + 5085731941478390906, + null, + null, + 50, + 53, + 50, + 53, + 14, + 17, + true, + "(!)", + "(!)" + ], + [ + "reference", + "title", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 8991166294068381652, + 13146587142049422196, + null, + null, + 55, + 138, + 55, + 138, + 18, + 32, + true, + "a significant petroleum system, northern Grand Banks area, offshore eastern Canada", + "a significant petroleum system, northern Grand Banks area, offshore eastern Canada." + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2654341348145270052, + 13911007437444991428, + null, + null, + 57, + 85, + 57, + 85, + 19, + 22, + true, + "significant petroleum system", + "significant petroleum system" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 5020053208872345017, + 12778217125270746067, + null, + null, + 87, + 112, + 87, + 112, + 23, + 27, + true, + "northern Grand Banks area", + "northern Grand Banks area" + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 11433597316182704533, + 321742181372268229, + null, + null, + 123, + 137, + 123, + 137, + 29, + 31, + true, + "eastern Canada", + "eastern Canada" + ], + [ + "sentence", + "proper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 3239699829819496083, + 8140375803971345791, + null, + null, + 139, + 162, + 139, + 162, + 32, + 38, + true, + "Am Assoc Pet Geol Bull.", + "Am Assoc Pet Geol Bull." + ], + [ + "reference", + "journal", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14445748745948696227, + 6494504935180328365, + null, + null, + 139, + 162, + 139, + 162, + 32, + 38, + true, + "Am Assoc Pet Geol Bull", + "Am Assoc Pet Geol Bull." + ], + [ + "term", + "single-term", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 14445748745948696227, + 6494504935180328364, + null, + null, + 139, + 161, + 139, + 161, + 32, + 37, + true, + "Am Assoc Pet Geol Bull", + "Am Assoc Pet Geol Bull" + ], + [ + "sentence", + "improper", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 2669509315780110933, + 17452564295061618645, + null, + null, + 163, + 184, + 163, + 184, + 38, + 49, + true, + "2005;89(9):1203-1237.", + "2005;89(9):1203-1237." + ], + [ + "reference", + "date", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 12010959389695517137, + 15013988311129840871, + null, + null, + 163, + 184, + 163, + 184, + 38, + 49, + true, + "2005;89(9):1203-1237", + "2005;89(9):1203-1237." + ], + [ + "parenthesis", + "reference", + 14682702346227170925, + "TEXT", + "#/texts/176", + 1.0, + 12178341415896392564, + 5085929270105113532, + null, + null, + 170, + 173, + 170, + 173, + 41, + 44, + true, + "(9)", + "(9)" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/177", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "sentence", + "improper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 15441160910541481462, + 3095595477306612046, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "9.", + "9." + ], + [ + "reference", + "reference-number", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 17767354399704235153, + 10433678415276841390, + null, + null, + 0, + 2, + 0, + 2, + 0, + 2, + true, + "9", + "9." + ], + [ + "sentence", + "proper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 14211509953373686953, + 10442209004816950267, + null, + null, + 3, + 54, + 3, + 54, + 2, + 11, + true, + "Estrada E. Subgraph centrality in complex networks.", + "Estrada E. Subgraph centrality in complex networks." + ], + [ + "name", + "person-name", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 5032660681914123489, + 10411767668009775523, + null, + null, + 3, + 22, + 3, + 22, + 2, + 6, + true, + "Estrada E Subgraph", + "Estrada E. Subgraph" + ], + [ + "reference", + "authors", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 6557810835592781181, + 17946965941344362242, + null, + null, + 3, + 13, + 3, + 13, + 2, + 5, + true, + "Estrada E", + "Estrada E." + ], + [ + "reference", + "title", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 10002059539925749429, + 4038144589619849266, + null, + null, + 14, + 54, + 14, + 54, + 5, + 11, + true, + "Subgraph centrality in complex networks", + "Subgraph centrality in complex networks." + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 13702393049667549173, + 5943650791086261175, + null, + null, + 14, + 33, + 14, + 33, + 5, + 7, + true, + "Subgraph centrality", + "Subgraph centrality" + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1651177076069931825, + 13122672563215344832, + null, + null, + 37, + 53, + 37, + 53, + 8, + 10, + true, + "complex networks", + "complex networks" + ], + [ + "sentence", + "proper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 11914250565653684629, + 14122288949077854502, + null, + null, + 55, + 66, + 55, + 66, + 11, + 15, + true, + "Phys Rev E.", + "Phys Rev E." + ], + [ + "reference", + "journal", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1821145667706451373, + 6349148037602643639, + null, + null, + 55, + 66, + 55, + 66, + 11, + 15, + true, + "Phys Rev E", + "Phys Rev E." + ], + [ + "term", + "single-term", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 1821145667706451373, + 6349148037602643636, + null, + null, + 55, + 65, + 55, + 65, + 11, + 14, + true, + "Phys Rev E", + "Phys Rev E" + ], + [ + "sentence", + "improper", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 13573598089500757206, + 6692458992266800770, + null, + null, + 67, + 85, + 67, + 85, + 15, + 24, + true, + "2005;71(5):056103.", + "2005;71(5):056103." + ], + [ + "reference", + "date", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 329104147695665975, + 9845650019423915667, + null, + null, + 67, + 72, + 67, + 72, + 15, + 17, + true, + "2005;", + "2005;" + ], + [ + "reference", + "volume", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 16380810009856206301, + 11521716509534155114, + null, + null, + 72, + 78, + 72, + 78, + 17, + 22, + true, + "71(5):", + "71(5):" + ], + [ + "parenthesis", + "reference", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 12178341415896395383, + 3095186558758793614, + null, + null, + 74, + 77, + 74, + 77, + 18, + 21, + true, + "(5)", + "(5)" + ], + [ + "reference", + "pages", + 11430385775112165283, + "TEXT", + "#/texts/178", + 1.0, + 16380805714058077749, + 2458865183124865563, + null, + null, + 78, + 85, + 78, + 85, + 22, + 24, + true, + "056103", + "056103." + ], + [ + "sentence", + "improper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12178341415896426716, + 2496381961233018859, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "10.", + "10." + ], + [ + "reference", + "reference-number", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15441160910541481982, + 2952327273286615866, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "10", + "10." + ], + [ + "sentence", + "proper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15888965152791123369, + 13311487678580191662, + null, + null, + 4, + 38, + 4, + 38, + 2, + 9, + true, + "Estrada Ernesto, Higham Desmond J.", + "Estrada Ernesto, Higham Desmond J." + ], + [ + "reference", + "authors", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15943206817210566989, + 16687717442496902915, + null, + null, + 4, + 38, + 4, + 38, + 2, + 9, + true, + "Estrada Ernesto, Higham Desmond J", + "Estrada Ernesto, Higham Desmond J." + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2628812302410383486, + 8225541491002394036, + null, + null, + 4, + 19, + 4, + 19, + 2, + 4, + true, + "Estrada Ernesto", + "Estrada Ernesto" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 9810881374821281499, + 15294715577751716659, + null, + null, + 21, + 37, + 21, + 37, + 5, + 8, + true, + "Higham Desmond J", + "Higham Desmond J" + ], + [ + "reference", + "date", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625548777062, + 8937154938925174773, + null, + null, + 39, + 46, + 39, + 46, + 9, + 13, + true, + "2010", + "(2010)." + ], + [ + "parenthesis", + "reference", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16380808315360989994, + 7590928242002916775, + null, + null, + 39, + 45, + 39, + 45, + 9, + 12, + true, + "(2010)", + "(2010)" + ], + [ + "sentence", + "improper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 17767354399704235166, + 8049906976560456930, + null, + null, + 45, + 46, + 45, + 46, + 12, + 13, + true, + ".", + "." + ], + [ + "sentence", + "proper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 13313338743045791386, + 13496281760238992122, + null, + null, + 47, + 100, + 47, + 100, + 13, + 20, + true, + "Network Properties Revealed through Matrix Functions.", + "Network Properties Revealed through Matrix Functions." + ], + [ + "reference", + "title", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 10343442203235089501, + 11301938714393369373, + null, + null, + 47, + 100, + 47, + 100, + 13, + 20, + true, + "Network Properties Revealed through Matrix Functions", + "Network Properties Revealed through Matrix Functions." + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 9529086943855412027, + 1909130811397866082, + null, + null, + 47, + 74, + 47, + 74, + 13, + 16, + true, + "Network Properties Revealed", + "Network Properties Revealed" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 14050323403523305703, + 1711773991294684512, + null, + null, + 83, + 99, + 83, + 99, + 17, + 19, + true, + "Matrix Functions", + "Matrix Functions" + ], + [ + "sentence", + "proper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12745772866621103425, + 14912721299200279248, + null, + null, + 101, + 131, + 101, + 131, + 20, + 33, + true, + "SIAM Review, 52, (4), 696-714.", + "SIAM Review, 52, (4), 696-714." + ], + [ + "reference", + "journal", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2746419737099405232, + 18061106767070096393, + null, + null, + 101, + 113, + 101, + 113, + 20, + 23, + true, + "SIAM Review", + "SIAM Review," + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 2746419737099405232, + 18061106767070096394, + null, + null, + 101, + 112, + 101, + 112, + 20, + 22, + true, + "SIAM Review", + "SIAM Review" + ], + [ + "reference", + "volume", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16380809986266457236, + 161102776712015127, + null, + null, + 114, + 122, + 114, + 122, + 23, + 29, + true, + "52, (4", + "52, (4)," + ], + [ + "parenthesis", + "reference", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12178341415896395057, + 2497165307488647522, + null, + null, + 118, + 121, + 118, + 121, + 25, + 28, + true, + "(4)", + "(4)" + ], + [ + "reference", + "pages", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 8104408773920978895, + 9147525378271823462, + null, + null, + 123, + 131, + 123, + 131, + 29, + 33, + true, + "696-714", + "696-714." + ], + [ + "expression", + "wtoken-concatenation", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 8104408773920978895, + 9147525378271823463, + null, + null, + 123, + 130, + 123, + 130, + 29, + 32, + true, + "696-714", + "696-714" + ], + [ + "sentence", + "improper", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 10188858309180365192, + 12377323489588219996, + null, + null, + 132, + 168, + 132, + 168, + 33, + 49, + true, + "http://dx.doi.org/10.1137/090761070.", + "http://dx.doi.org/10.1137/090761070." + ], + [ + "reference", + "doi", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 16159594323378820687, + 15692242274322104013, + null, + null, + 132, + 168, + 132, + 168, + 33, + 49, + true, + "http://dx.doi.org/10.1137/090761070", + "http://dx.doi.org/10.1137/090761070." + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 389609625695173007, + 8776546935861356993, + null, + null, + 132, + 136, + 132, + 136, + 33, + 34, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 15441160910541486860, + 2952325046422382730, + null, + null, + 139, + 141, + 139, + 141, + 37, + 38, + true, + "dx", + "dx" + ], + [ + "term", + "single-term", + 5825495964576843004, + "TEXT", + "#/texts/179", + 1.0, + 12178341415895623052, + 2496395224268980578, + null, + null, + 146, + 149, + 146, + 149, + 41, + 42, + true, + "org", + "org" + ], + [ + "sentence", + "improper", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 12178341415896426655, + 7596226664406524957, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "11.", + "11." + ], + [ + "reference", + "reference-number", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 15441160910541481983, + 11293846485728944319, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "11", + "11." + ], + [ + "sentence", + "proper", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 2199715623168261348, + 14344176115087650584, + null, + null, + 4, + 15, + 4, + 15, + 2, + 5, + true, + "Labs Redis.", + "Labs Redis." + ], + [ + "reference", + "authors", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 1413805758909278007, + 12182268615745487815, + null, + null, + 4, + 15, + 4, + 15, + 2, + 5, + true, + "Labs Redis", + "Labs Redis." + ], + [ + "term", + "single-term", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 1413805758909278007, + 12182268615745487814, + null, + null, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "Labs Redis", + "Labs Redis" + ], + [ + "sentence", + "proper", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 9890391113606841714, + 7310122424657160613, + null, + null, + 16, + 44, + 16, + 44, + 5, + 11, + true, + "Benchmarking RedisGraph 1.0.", + "Benchmarking RedisGraph 1.0." + ], + [ + "reference", + "title", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 17216005724109731720, + 11993954726519740908, + null, + null, + 16, + 44, + 16, + 44, + 5, + 11, + true, + "Benchmarking RedisGraph 1.0", + "Benchmarking RedisGraph 1.0." + ], + [ + "term", + "single-term", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 5889193357245449519, + 7672393344946738743, + null, + null, + 16, + 39, + 16, + 39, + 5, + 7, + true, + "Benchmarking RedisGraph", + "Benchmarking RedisGraph" + ], + [ + "expression", + "wtoken-concatenation", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 12178341415896427355, + 7596226314134098818, + null, + null, + 40, + 43, + 40, + 43, + 7, + 10, + true, + "1.0", + "1.0" + ], + [ + "sentence", + "improper", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 329104147695662014, + 11537339699383207639, + null, + null, + 45, + 50, + 45, + 50, + 11, + 13, + true, + "2019.", + "2019." + ], + [ + "reference", + "date", + 5698421097735371040, + "TEXT", + "#/texts/180", + 1.0, + 389609625548777055, + 1517668227262464255, + null, + null, + 45, + 50, + 45, + 50, + 11, + 13, + true, + "2019", + "2019." + ], + [ + "sentence", + "improper", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 12178341415896426590, + 4180477249261114913, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "12.", + "12." + ], + [ + "reference", + "reference-number", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15441160910541481976, + 12703724519968684239, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "12", + "12." + ], + [ + "sentence", + "proper", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15754713894443025139, + 17869835566751337591, + null, + null, + 4, + 15, + 4, + 15, + 2, + 4, + true, + "TigerGraph.", + "TigerGraph." + ], + [ + "reference", + "authors", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15861880261780248619, + 9206162103335947230, + null, + null, + 4, + 15, + 4, + 15, + 2, + 4, + true, + "TigerGraph", + "TigerGraph." + ], + [ + "term", + "single-term", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 15861880261780248619, + 9206162103335947231, + null, + null, + 4, + 14, + 4, + 14, + 2, + 3, + true, + "TigerGraph", + "TigerGraph" + ], + [ + "sentence", + "proper", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 7946825277683884881, + 1230187338989102593, + null, + null, + 16, + 46, + 16, + 46, + 4, + 11, + true, + "Real-Time Deep Link Analytics.", + "Real-Time Deep Link Analytics." + ], + [ + "reference", + "title", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 17475892521501552303, + 8529795867214537155, + null, + null, + 16, + 46, + 16, + 46, + 4, + 11, + true, + "Real-Time Deep Link Analytics", + "Real-Time Deep Link Analytics." + ], + [ + "name", + "name-concatenation", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 5955741586810846236, + 15240428492191467486, + null, + null, + 16, + 25, + 16, + 25, + 4, + 7, + true, + "Real-Time", + "Real-Time" + ], + [ + "term", + "single-term", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 14317037945453024278, + 15123649660345785041, + null, + null, + 21, + 45, + 21, + 45, + 6, + 10, + true, + "Time Deep Link Analytics", + "Time Deep Link Analytics" + ], + [ + "sentence", + "improper", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 329104147695661814, + 7543078487534121494, + null, + null, + 47, + 52, + 47, + 52, + 11, + 13, + true, + "2018.", + "2018." + ], + [ + "reference", + "date", + 5870535063942256428, + "TEXT", + "#/texts/181", + 1.0, + 389609625548777054, + 3194806985827377521, + null, + null, + 47, + 52, + 47, + 52, + 11, + 13, + true, + "2018", + "2018." + ], + [ + "sentence", + "improper", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 12178341415896424072, + 14083466083102208723, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "13.", + "13." + ], + [ + "reference", + "reference-number", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 15441160910541481977, + 12462842527617278832, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "13", + "13." + ], + [ + "sentence", + "proper", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 2261840617824203371, + 3833037035800633943, + null, + null, + 4, + 73, + 4, + 73, + 2, + 17, + true, + "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra.", + "Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra." + ], + [ + "reference", + "authors", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 4413158441497355977, + 888725642167870501, + null, + null, + 4, + 21, + 4, + 21, + 2, + 8, + true, + "Jeremy K, John G", + "Jeremy K, John G." + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14652280730090715542, + 9368048166047908224, + null, + null, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "name", + "person-name", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 4962934261580742358, + 3284808524522933032, + null, + null, + 14, + 27, + 14, + 27, + 5, + 9, + true, + "John G Graph", + "John G. Graph" + ], + [ + "reference", + "title", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 11539515714196318944, + 4409464707523225605, + null, + null, + 22, + 73, + 22, + 73, + 8, + 17, + true, + "Graph Algorithms in the Language of Linear Algebra", + "Graph Algorithms in the Language of Linear Algebra." + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14079274028767783387, + 17595184631762760537, + null, + null, + 22, + 38, + 22, + 38, + 8, + 10, + true, + "Graph Algorithms", + "Graph Algorithms" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 14650462612952067914, + 15224301288684964806, + null, + null, + 46, + 54, + 46, + 54, + 12, + 13, + true, + "Language", + "Language" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 16513864209537702472, + 7141276361161445756, + null, + null, + 58, + 72, + 58, + 72, + 14, + 16, + true, + "Linear Algebra", + "Linear Algebra" + ], + [ + "sentence", + "proper", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 3918811354618692965, + 10240964037709860462, + null, + null, + 74, + 145, + 74, + 145, + 17, + 30, + true, + "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", + "Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011." + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 1813266722082342225, + 593931840598100395, + null, + null, + 74, + 86, + 74, + 86, + 17, + 18, + true, + "Philadelphia", + "Philadelphia" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 15441160910541487654, + 12462847826366847251, + null, + null, + 88, + 90, + 88, + 90, + 19, + 20, + true, + "PA", + "PA" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 8106352717733900272, + 18316158962653956918, + null, + null, + 92, + 99, + 92, + 99, + 21, + 22, + true, + "Society", + "Society" + ], + [ + "term", + "enum-term-mark-4", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 17988145802762076819, + 11569702800846552129, + null, + null, + 104, + 138, + 104, + 138, + 23, + 27, + true, + "Industrial and Applied Mathematics", + "Industrial and Applied Mathematics" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 7898186517875929489, + 6998199463972144020, + null, + null, + 104, + 114, + 104, + 114, + 23, + 24, + true, + "Industrial", + "Industrial" + ], + [ + "term", + "single-term", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 7685464491762532718, + 11454351202197972573, + null, + null, + 119, + 138, + 119, + 138, + 25, + 27, + true, + "Applied Mathematics", + "Applied Mathematics" + ], + [ + "reference", + "date", + 18196767266655606709, + "TEXT", + "#/texts/182", + 1.0, + 389609625548777063, + 12403401240882116541, + null, + null, + 140, + 145, + 140, + 145, + 28, + 30, + true, + "2011", + "2011." + ], + [ + "sentence", + "improper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 12178341415896424137, + 2021336641528383539, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "14.", + "14." + ], + [ + "reference", + "reference-number", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541481978, + 9067685736347109847, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "14", + "14." + ], + [ + "sentence", + "proper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 8027673259181526609, + 3901675832395310476, + null, + null, + 4, + 106, + 4, + 104, + 2, + 25, + true, + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015).", + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015)." + ], + [ + "reference", + "authors", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 11143603644967201081, + 10268584537510827373, + null, + null, + 4, + 98, + 4, + 96, + 2, + 21, + true, + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning", + "Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3893756947393595038, + 15910484170600691612, + null, + null, + 4, + 17, + 4, + 17, + 2, + 4, + true, + "Kepner Jeremy", + "Kepner Jeremy" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 4638041857648041651, + 2139644705806385528, + null, + null, + 19, + 30, + 19, + 30, + 5, + 7, + true, + "Bader David", + "Bader David" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6559847563621387069, + 11479165544683600786, + null, + null, + 32, + 42, + 32, + 41, + 8, + 10, + true, + "Bulu\u00e7 Ayd", + "Bulu\u00e7 Ayd" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 978039607314331382, + 9008054255178396141, + null, + null, + 49, + 61, + 47, + 59, + 13, + 15, + true, + "Gilbert John", + "Gilbert John" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 10968707392751490476, + 11627993516556341660, + null, + null, + 63, + 78, + 61, + 76, + 16, + 18, + true, + "Mattson Timothy", + "Mattson Timothy" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 3010219124533777340, + 3552467627404320563, + null, + null, + 80, + 98, + 78, + 96, + 19, + 21, + true, + "Meyerhenke Henning", + "Meyerhenke Henning" + ], + [ + "reference", + "date", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 389609625548777059, + 3330964369910710952, + null, + null, + 99, + 106, + 97, + 104, + 21, + 25, + true, + "2015", + "(2015)." + ], + [ + "parenthesis", + "reference", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 16380808315360990702, + 11846655963247696776, + null, + null, + 99, + 105, + 97, + 103, + 21, + 24, + true, + "(2015)", + "(2015)" + ], + [ + "sentence", + "proper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 17293964586930460261, + 12804061004186124881, + null, + null, + 107, + 163, + 105, + 161, + 25, + 37, + true, + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons.", + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons." + ], + [ + "reference", + "title", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 4447441827419394948, + 14102975208778644634, + null, + null, + 107, + 163, + 105, + 161, + 25, + 37, + true, + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons", + "Graphs, Matrices, and the GraphBLAS: Seven Good Reasons." + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 16380809986240833363, + 17968496728215965151, + null, + null, + 107, + 113, + 105, + 111, + 25, + 26, + true, + "Graphs", + "Graphs" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 14650311457598610046, + 1458725737752079201, + null, + null, + 115, + 123, + 113, + 121, + 27, + 28, + true, + "Matrices", + "Matrices" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6560668489345557302, + 13696029002884714705, + null, + null, + 133, + 142, + 131, + 140, + 31, + 32, + true, + "GraphBLAS", + "GraphBLAS" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 10585062274889693433, + 394824704429372117, + null, + null, + 144, + 162, + 142, + 160, + 33, + 36, + true, + "Seven Good Reasons", + "Seven Good Reasons" + ], + [ + "sentence", + "proper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6573516791222902756, + 17147560721361502235, + null, + null, + 164, + 205, + 162, + 203, + 37, + 47, + true, + "Procedia Computer Science, 51, 2453-2462.", + "Procedia Computer Science, 51, 2453-2462." + ], + [ + "reference", + "journal", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 11311803343161413167, + 2833609951174621744, + null, + null, + 164, + 190, + 162, + 188, + 37, + 41, + true, + "Procedia Computer Science", + "Procedia Computer Science," + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 11311803343161413167, + 2833609951174621747, + null, + null, + 164, + 189, + 162, + 187, + 37, + 40, + true, + "Procedia Computer Science", + "Procedia Computer Science" + ], + [ + "reference", + "volume", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541486330, + 9067694506000682764, + null, + null, + 191, + 194, + 189, + 192, + 41, + 43, + true, + "51", + "51," + ], + [ + "reference", + "pages", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6573068860818606718, + 4687668980596472571, + null, + null, + 195, + 205, + 193, + 203, + 43, + 47, + true, + "2453-2462", + "2453-2462." + ], + [ + "expression", + "wtoken-concatenation", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 6573068860818606718, + 4687668980596472570, + null, + null, + 195, + 204, + 193, + 202, + 43, + 46, + true, + "2453-2462", + "2453-2462" + ], + [ + "sentence", + "improper", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 17722292403768798252, + 7166915790163671949, + null, + null, + 206, + 252, + 204, + 250, + 47, + 71, + true, + "http://dx.doi.org/10.1016/j.procs.2015.05.353.", + "http://dx.doi.org/10.1016/j.procs.2015.05.353." + ], + [ + "reference", + "doi", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 13624625778145690696, + 15445141723877014785, + null, + null, + 206, + 252, + 204, + 250, + 47, + 71, + true, + "http://dx.doi.org/10.1016/j.procs.2015.05.353", + "http://dx.doi.org/10.1016/j.procs.2015.05.353." + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 389609625695173007, + 3324915498141700280, + null, + null, + 206, + 210, + 204, + 208, + 47, + 48, + true, + "http", + "http" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541486860, + 9067695028646494582, + null, + null, + 213, + 215, + 211, + 213, + 51, + 52, + true, + "dx", + "dx" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 12178341415895623052, + 2021307257966447238, + null, + null, + 220, + 223, + 218, + 221, + 55, + 56, + true, + "org", + "org" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 15441160910541481522, + 9067685766522309508, + null, + null, + 231, + 233, + 229, + 231, + 60, + 62, + true, + "/j", + "/j" + ], + [ + "term", + "single-term", + 3623403683642367845, + "TEXT", + "#/texts/183", + 1.0, + 329104161588706802, + 14124897019871745005, + null, + null, + 234, + 239, + 232, + 237, + 63, + 64, + true, + "procs", + "procs" + ], + [ + "sentence", + "improper", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 12178341415896420618, + 3824456860028023899, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "15.", + "15." + ], + [ + "reference", + "reference-number", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15441160910541481979, + 10213682970367471344, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "15", + "15." + ], + [ + "sentence", + "proper", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16743274342806123059, + 16250199669292766293, + null, + null, + 4, + 94, + 4, + 94, + 2, + 20, + true, + "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications.", + "Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications." + ], + [ + "reference", + "authors", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15404759540282474341, + 7980371121466471931, + null, + null, + 4, + 28, + 4, + 28, + 2, + 9, + true, + "Aydin B, Gilbert John R", + "Aydin B, Gilbert John R." + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 8106396252822508385, + 7971302054101082514, + null, + null, + 4, + 11, + 4, + 11, + 2, + 4, + true, + "Aydin B", + "Aydin B" + ], + [ + "name", + "person-name", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 17208477177194249305, + 13847153851054112206, + null, + null, + 13, + 32, + 13, + 32, + 5, + 10, + true, + "Gilbert John R The", + "Gilbert John R. The" + ], + [ + "reference", + "title", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 6150328359964540652, + 10199114762007747144, + null, + null, + 29, + 94, + 29, + 94, + 9, + 20, + true, + "The combinatorial BLAS: design, implementation, and applications", + "The combinatorial BLAS: design, implementation, and applications." + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 11111529766026683653, + 13196650859027091171, + null, + null, + 33, + 51, + 33, + 51, + 10, + 12, + true, + "combinatorial BLAS", + "combinatorial BLAS" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16381206568241679420, + 15760767362173066532, + null, + null, + 53, + 59, + 53, + 59, + 13, + 14, + true, + "design", + "design" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 16770038681622514616, + 12413351225926077106, + null, + null, + 61, + 75, + 61, + 75, + 15, + 16, + true, + "implementation", + "implementation" + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 546291010477001669, + 10618604754194727447, + null, + null, + 81, + 93, + 81, + 93, + 18, + 19, + true, + "applications", + "applications" + ], + [ + "sentence", + "proper", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 738491526082319197, + 7325738847681902359, + null, + null, + 95, + 126, + 95, + 126, + 20, + 27, + true, + "Int J High Perform Comput Appl.", + "Int J High Perform Comput Appl." + ], + [ + "reference", + "journal", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15067288891537767501, + 3357793480659482143, + null, + null, + 95, + 126, + 95, + 126, + 20, + 27, + true, + "Int J High Perform Comput Appl", + "Int J High Perform Comput Appl." + ], + [ + "term", + "single-term", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 15067288891537767501, + 3357793480659482128, + null, + null, + 95, + 125, + 95, + 125, + 20, + 26, + true, + "Int J High Perform Comput Appl", + "Int J High Perform Comput Appl" + ], + [ + "sentence", + "proper", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 17282712032868423392, + 7113339629628212811, + null, + null, + 127, + 147, + 127, + 147, + 27, + 38, + true, + "2011;25 (4):496-509.", + "2011;25 (4):496-509." + ], + [ + "reference", + "date", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 11473506778099773410, + 15021630246282813280, + null, + null, + 127, + 147, + 127, + 147, + 27, + 38, + true, + "2011;25 (4):496-509", + "2011;25 (4):496-509." + ], + [ + "expression", + "wtoken-concatenation", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 8104407400321262254, + 3429534335477953780, + null, + null, + 127, + 134, + 127, + 134, + 27, + 30, + true, + "2011;25", + "2011;25" + ], + [ + "parenthesis", + "reference", + 13936866850854297069, + "TEXT", + "#/texts/184", + 1.0, + 12178341415896395057, + 3824454373173587092, + null, + null, + 135, + 138, + 135, + 138, + 30, + 33, + true, + "(4)", + "(4)" + ], + [ + "sentence", + "improper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12178341415896420683, + 15900700274059095170, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "16.", + "16." + ], + [ + "reference", + "reference-number", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 15441160910541481860, + 13099555958800192774, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "16", + "16." + ], + [ + "sentence", + "proper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11950444114006552808, + 9443399481099004568, + null, + null, + 4, + 87, + 4, + 87, + 2, + 21, + true, + "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS.", + "Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS." + ], + [ + "reference", + "authors", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 17859568381876102831, + 13827450673521059842, + null, + null, + 4, + 44, + 4, + 44, + 2, + 15, + true, + "Jeremy K, Peter A, Bader David A, et al", + "Jeremy K, Peter A, Bader David A, et al." + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 14652280730090715542, + 12791881049692147803, + null, + null, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Jeremy K", + "Jeremy K" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 8106352035144611671, + 4513564816050590788, + null, + null, + 14, + 21, + 14, + 21, + 5, + 7, + true, + "Peter A", + "Peter A" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11373457542276896833, + 10633744312666392907, + null, + null, + 23, + 36, + 23, + 36, + 8, + 11, + true, + "Bader David A", + "Bader David A" + ], + [ + "expression", + "common", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 329104162180805867, + 691614670836427228, + null, + null, + 38, + 44, + 38, + 44, + 12, + 15, + true, + "et al", + "et al." + ], + [ + "reference", + "title", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 11793767366674291400, + 16322176465659145653, + null, + null, + 45, + 103, + 45, + 103, + 15, + 25, + true, + "Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC", + "Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC." + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 8492799926004932084, + 1862233703664560925, + null, + null, + 45, + 69, + 45, + 69, + 15, + 17, + true, + "Mathematical foundations", + "Mathematical foundations" + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 6560668489345557302, + 18401509678758935464, + null, + null, + 77, + 86, + 77, + 86, + 19, + 20, + true, + "GraphBLAS", + "GraphBLAS" + ], + [ + "sentence", + "improper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 389609625548777056, + 8567475520614412130, + null, + null, + 88, + 92, + 88, + 92, + 21, + 22, + true, + "2016", + "2016" + ], + [ + "sentence", + "proper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 515474695412696961, + 6296343322569991622, + null, + null, + 93, + 103, + 93, + 103, + 22, + 25, + true, + "IEEE HPEC.", + "IEEE HPEC." + ], + [ + "term", + "single-term", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 6560769162974074266, + 16780287060117651276, + null, + null, + 93, + 102, + 93, + 102, + 22, + 24, + true, + "IEEE HPEC", + "IEEE HPEC" + ], + [ + "sentence", + "improper", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12668400427997832797, + 10477465110317917500, + null, + null, + 104, + 114, + 104, + 114, + 25, + 31, + true, + "2016; 1-9.", + "2016; 1-9." + ], + [ + "reference", + "date", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 329104147695661831, + 5482404028284083905, + null, + null, + 104, + 109, + 104, + 109, + 25, + 27, + true, + "2016;", + "2016;" + ], + [ + "reference", + "pages", + 8497015665124263236, + "TEXT", + "#/texts/185", + 1.0, + 12178341415896427413, + 15900868514674279592, + null, + null, + 110, + 114, + 110, + 114, + 27, + 31, + true, + "1-9", + "1-9." + ], + [ + "sentence", + "improper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 12178341415896424331, + 1785950286755592566, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "17.", + "17." + ], + [ + "reference", + "reference-number", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 15441160910541481861, + 5749903657566610071, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "17", + "17." + ], + [ + "sentence", + "proper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8625319757665987765, + 8201554304424324610, + null, + null, + 4, + 46, + 4, + 46, + 2, + 15, + true, + "Ariful A, Mathias J, Aydin B, Ng Esmond G.", + "Ariful A, Mathias J, Aydin B, Ng Esmond G." + ], + [ + "reference", + "authors", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 10218631809067229551, + 3045627595466121013, + null, + null, + 4, + 46, + 4, + 46, + 2, + 15, + true, + "Ariful A, Mathias J, Aydin B, Ng Esmond G", + "Ariful A, Mathias J, Aydin B, Ng Esmond G." + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 14650296444613217893, + 2015187192231796797, + null, + null, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Ariful A", + "Ariful A" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 6611311853662317003, + 219996680584521934, + null, + null, + 14, + 23, + 14, + 23, + 5, + 7, + true, + "Mathias J", + "Mathias J" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8106396252822508385, + 5214697480984905265, + null, + null, + 25, + 32, + 25, + 32, + 8, + 10, + true, + "Aydin B", + "Aydin B" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 7695963223911460273, + 9674237366617659132, + null, + null, + 34, + 45, + 34, + 45, + 11, + 14, + true, + "Ng Esmond G", + "Ng Esmond G" + ], + [ + "sentence", + "proper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 12182767863167085705, + 5974957861280769301, + null, + null, + 47, + 105, + 47, + 105, + 15, + 26, + true, + "The reverse Cuthill-McKee algorithm in distributed-memory.", + "The reverse Cuthill-McKee algorithm in distributed-memory." + ], + [ + "reference", + "title", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 18143113072209505450, + 5317689214231344369, + null, + null, + 47, + 105, + 47, + 105, + 15, + 26, + true, + "The reverse Cuthill-McKee algorithm in distributed-memory", + "The reverse Cuthill-McKee algorithm in distributed-memory." + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8743700494712196097, + 1542666924841238621, + null, + null, + 51, + 66, + 51, + 66, + 16, + 18, + true, + "reverse Cuthill", + "reverse Cuthill" + ], + [ + "expression", + "word-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 17823074998039859280, + 107872343608001032, + null, + null, + 59, + 72, + 59, + 72, + 17, + 20, + true, + "Cuthill-McKee", + "Cuthill-McKee" + ], + [ + "name", + "name-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 961990462724452746, + 8774024725617322003, + null, + null, + 59, + 69, + 59, + 69, + 17, + 19, + true, + "Cuthill-Mc", + "Cuthill-Mc" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 396862896536119578, + 8212700969941978582, + null, + null, + 67, + 82, + 67, + 82, + 19, + 21, + true, + "McKee algorithm", + "McKee algorithm" + ], + [ + "expression", + "word-concatenation", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 4632629274084093489, + 13294623612112829573, + null, + null, + 86, + 104, + 86, + 104, + 22, + 25, + true, + "distributed-memory", + "distributed-memory" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 16381206567042997791, + 13949121518387210194, + null, + null, + 98, + 104, + 98, + 104, + 24, + 25, + true, + "memory", + "memory" + ], + [ + "reference", + "conference", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 5977126754161531620, + 8369992873906444297, + null, + null, + 106, + 197, + 106, + 197, + 26, + 44, + true, + "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31", + "2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31." + ], + [ + "sentence", + "improper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 389609625548777057, + 8314107736373646335, + null, + null, + 106, + 110, + 106, + 110, + 26, + 27, + true, + "2017", + "2017" + ], + [ + "sentence", + "proper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 10555308991053583656, + 8625840606506711403, + null, + null, + 111, + 184, + 111, + 184, + 27, + 38, + true, + "IEEE International Parallel and Distributed Processing Symposium (IPDPS).", + "IEEE International Parallel and Distributed Processing Symposium (IPDPS)." + ], + [ + "term", + "enum-term-mark-4", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8480286396580246383, + 10467027636114534702, + null, + null, + 111, + 175, + 111, + 175, + 27, + 34, + true, + "IEEE International Parallel and Distributed Processing Symposium", + "IEEE International Parallel and Distributed Processing Symposium" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8242037745725614235, + 1744334934210275218, + null, + null, + 111, + 138, + 111, + 138, + 27, + 30, + true, + "IEEE International Parallel", + "IEEE International Parallel" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 1179067127764944952, + 11104445064314775390, + null, + null, + 143, + 175, + 143, + 175, + 31, + 34, + true, + "Distributed Processing Symposium", + "Distributed Processing Symposium" + ], + [ + "parenthesis", + "round brackets", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 8106341251871154495, + 14756599361695942733, + null, + null, + 176, + 183, + 176, + 183, + 34, + 37, + true, + "(IPDPS)", + "(IPDPS)" + ], + [ + "term", + "single-term", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 329104161866629985, + 4498077561104002021, + null, + null, + 177, + 182, + 177, + 182, + 35, + 36, + true, + "IPDPS", + "IPDPS" + ], + [ + "sentence", + "improper", + 15947529491299956047, + "TEXT", + "#/texts/186", + 1.0, + 15668671505312224859, + 7267236904131898531, + null, + null, + 185, + 197, + 185, + 197, + 38, + 44, + true, + "2017: 22-31.", + "2017: 22-31." + ], + [ + "sentence", + "improper", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 12178341415896424394, + 9464187724344101613, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "18.", + "18." + ], + [ + "reference", + "reference-number", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 15441160910541481862, + 17618650105274567066, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "18", + "18." + ], + [ + "sentence", + "proper", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 8633051299923742554, + 9812031180740342815, + null, + null, + 4, + 87, + 4, + 87, + 2, + 19, + true, + "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices.", + "Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices." + ], + [ + "reference", + "authors", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 9985406748938595316, + 16734523975477127612, + null, + null, + 4, + 37, + 4, + 37, + 2, + 11, + true, + "Rukhsana S, Anila U, Chughtai IR", + "Rukhsana S, Anila U, Chughtai IR." + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 9277063416399937233, + 9921862040524615824, + null, + null, + 4, + 14, + 4, + 14, + 2, + 4, + true, + "Rukhsana S", + "Rukhsana S" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 8106479273814684994, + 12770854321018137055, + null, + null, + 16, + 23, + 16, + 23, + 5, + 7, + true, + "Anila U", + "Anila U" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 4371320678784428525, + 15222832476664208124, + null, + null, + 25, + 36, + 25, + 36, + 8, + 10, + true, + "Chughtai IR", + "Chughtai IR" + ], + [ + "name", + "person-name", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 14652303699240355001, + 1172671267071592161, + null, + null, + 35, + 44, + 35, + 44, + 9, + 12, + true, + "R Review", + "R. Review" + ], + [ + "reference", + "title", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 5583013427504923325, + 8891716095058217669, + null, + null, + 38, + 120, + 38, + 120, + 11, + 23, + true, + "Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic", + "Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 5298571882490963181, + 13490463183486071840, + null, + null, + 48, + 66, + 48, + 66, + 13, + 15, + true, + "storage techniques", + "storage techniques" + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 5038915387230346489, + 6702839604458857240, + null, + null, + 71, + 86, + 71, + 86, + 16, + 18, + true, + "sparse matrices", + "sparse matrices" + ], + [ + "sentence", + "improper", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 389609625548757410, + 18165604049296771030, + null, + null, + 88, + 92, + 88, + 92, + 19, + 20, + true, + "2005", + "2005" + ], + [ + "sentence", + "proper", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 14938776978172003836, + 10713320247466750625, + null, + null, + 93, + 132, + 93, + 132, + 20, + 25, + true, + "Pakistan Section Multitopic Conference.", + "Pakistan Section Multitopic Conference." + ], + [ + "term", + "single-term", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 1320248361117940781, + 5199561905441189481, + null, + null, + 93, + 131, + 93, + 131, + 20, + 24, + true, + "Pakistan Section Multitopic Conference", + "Pakistan Section Multitopic Conference" + ], + [ + "reference", + "conference", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 4373101011741787076, + 6434853878367657275, + null, + null, + 121, + 137, + 121, + 137, + 23, + 26, + true, + "Conference. 2005", + "Conference. 2005" + ], + [ + "sentence", + "improper", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 6573469177968412116, + 6998677959073478193, + null, + null, + 133, + 142, + 133, + 142, + 25, + 30, + true, + "2005 1-7.", + "2005 1-7." + ], + [ + "reference", + "pages", + 14843401725435831033, + "TEXT", + "#/texts/187", + 1.0, + 12178341415896427411, + 9464229838695116070, + null, + null, + 138, + 142, + 138, + 142, + 26, + 30, + true, + "1-7", + "1-7." + ], + [ + "sentence", + "improper", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 12178341415896423945, + 1346293265340748508, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "19.", + "19." + ], + [ + "reference", + "reference-number", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 15441160910541481863, + 8099163979199984839, + null, + null, + 0, + 3, + 0, + 3, + 0, + 2, + true, + "19", + "19." + ], + [ + "sentence", + "proper", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1083095538878710059, + 6410519906814031679, + null, + null, + 4, + 176, + 4, + 176, + 2, + 36, + true, + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997." + ], + [ + "reference", + "authors", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 4102400299870176607, + 16168638938102127468, + null, + null, + 4, + 56, + 4, + 56, + 2, + 14, + true, + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin", + "Welte DH, Horsfield B, Baker DR. Petroleum and Basin" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14638563242508500832, + 2752940376292253295, + null, + null, + 4, + 12, + 4, + 12, + 2, + 4, + true, + "Welte DH", + "Welte DH" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1317380608127935415, + 8792991722627090893, + null, + null, + 14, + 25, + 14, + 25, + 5, + 7, + true, + "Horsfield B", + "Horsfield B" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14650425433297857126, + 17200611816160356686, + null, + null, + 27, + 35, + 27, + 35, + 8, + 10, + true, + "Baker DR", + "Baker DR" + ], + [ + "name", + "person-name", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 9811818043271335175, + 5388942193352320893, + null, + null, + 34, + 46, + 34, + 46, + 9, + 12, + true, + "R Petroleum", + "R. Petroleum" + ], + [ + "term", + "enum-term-mark-4", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 10939118393609776387, + 16464700551363827911, + null, + null, + 37, + 66, + 37, + 66, + 11, + 15, + true, + "Petroleum and Basin Evolution", + "Petroleum and Basin Evolution" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1538397892452668306, + 4387773196933243696, + null, + null, + 51, + 66, + 51, + 66, + 13, + 15, + true, + "Basin Evolution", + "Basin Evolution" + ], + [ + "reference", + "title", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 4264009440714515689, + 4786526718705436187, + null, + null, + 57, + 134, + 57, + 134, + 14, + 27, + true, + "Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling", + "Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling," + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 14652305210070084086, + 12917423391664842605, + null, + null, + 68, + 76, + 68, + 76, + 16, + 17, + true, + "Insights", + "Insights" + ], + [ + "term", + "enum-term-mark-4", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 17006005703909820457, + 12901071012590413163, + null, + null, + 82, + 133, + 82, + 133, + 18, + 26, + true, + "Petroleum Geochemistry, Geology, and Basin Modeling", + "Petroleum Geochemistry, Geology, and Basin Modeling" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 6297994706585107052, + 15599666871412118732, + null, + null, + 82, + 104, + 82, + 104, + 18, + 20, + true, + "Petroleum Geochemistry", + "Petroleum Geochemistry" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 8106351569626681077, + 12181079068816099642, + null, + null, + 106, + 113, + 106, + 113, + 21, + 22, + true, + "Geology", + "Geology" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 18229361067854714750, + 1667565054890986990, + null, + null, + 119, + 133, + 119, + 133, + 24, + 26, + true, + "Basin Modeling", + "Basin Modeling" + ], + [ + "term", + "single-term", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 1961730974653605813, + 770501763529322377, + null, + null, + 135, + 152, + 135, + 152, + 27, + 29, + true, + "Berlin Heidelberg", + "Berlin Heidelberg" + ], + [ + "name", + "name-concatenation", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 3197612152806046883, + 2512966040017790311, + null, + null, + 154, + 169, + 154, + 169, + 30, + 33, + true, + "Springer-Verlag", + "Springer-Verlag" + ], + [ + "reference", + "date", + 16676439669743530711, + "TEXT", + "#/texts/188", + 1.0, + 389609625536085743, + 8456122008713527720, + null, + null, + 171, + 176, + 171, + 176, + 34, + 36, + true, + "1997", + "1997." + ], + [ + "sentence", + "proper", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 3399237007757536794, + 10798111996929910377, + null, + null, + 0, + 152, + 0, + 152, + 0, + 31, + true, + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora.", + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora." + ], + [ + "reference", + "title", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 18273937239822213328, + 9383076520753321936, + null, + null, + 0, + 152, + 0, + 152, + 0, + 31, + true, + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", + "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora." + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106397798288310212, + 6892140235696191542, + null, + null, + 17, + 24, + 17, + 24, + 4, + 5, + true, + "article", + "article" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 6052191155307735802, + 18032874033977513490, + null, + null, + 26, + 35, + 26, + 35, + 6, + 8, + true, + "Staar PWJ", + "Staar PWJ" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106351306870445011, + 7231860053894851093, + null, + null, + 37, + 44, + 37, + 44, + 9, + 11, + true, + "Dolfi M", + "Dolfi M" + ], + [ + "name", + "person-name", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 13763699434920414504, + 14310942059015767454, + null, + null, + 46, + 60, + 46, + 60, + 12, + 16, + true, + "Auer C Corpus", + "Auer C. Corpus" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 1821123588367592853, + 13143602266977617422, + null, + null, + 54, + 79, + 54, + 79, + 15, + 18, + true, + "Corpus processing service", + "Corpus processing service" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12981440865159980116, + 8397818236619725491, + null, + null, + 83, + 107, + 83, + 107, + 20, + 23, + true, + "Knowledge Graph platform", + "Knowledge Graph platform" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 13671659409933113155, + 12446642666303205360, + null, + null, + 119, + 140, + 119, + 140, + 25, + 28, + true, + "deep data exploration", + "deep data exploration" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 8106398483106473371, + 4135599828090019002, + null, + null, + 144, + 151, + 144, + 151, + 29, + 30, + true, + "corpora", + "corpora" + ], + [ + "sentence", + "proper", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 9017840063542546137, + 3221707506812699045, + null, + null, + 153, + 172, + 153, + 172, + 31, + 35, + true, + "Applied AI Letters.", + "Applied AI Letters." + ], + [ + "reference", + "journal", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 10525943314116263182, + 11312474291607917610, + null, + null, + 153, + 172, + 153, + 172, + 31, + 35, + true, + "Applied AI Letters", + "Applied AI Letters." + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 10525943314116263182, + 11312474291607917611, + null, + null, + 153, + 171, + 153, + 171, + 31, + 34, + true, + "Applied AI Letters", + "Applied AI Letters" + ], + [ + "sentence", + "proper", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 7365754457409807236, + 16458707549922411068, + null, + null, + 173, + 184, + 173, + 184, + 35, + 42, + true, + "2020;1:e20.", + "2020;1:e20." + ], + [ + "reference", + "date", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12668563530344603848, + 14820206483220239470, + null, + null, + 173, + 184, + 173, + 184, + 35, + 42, + true, + "2020;1:e20", + "2020;1:e20." + ], + [ + "expression", + "wtoken-concatenation", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12668563530344603848, + 14820206483220239473, + null, + null, + 173, + 183, + 173, + 183, + 35, + 41, + true, + "2020;1:e20", + "2020;1:e20" + ], + [ + "sentence", + "improper", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 751450063096904044, + 2161551171101074414, + null, + null, + 185, + 216, + 185, + 216, + 42, + 58, + true, + "https://doi.org/10.1002/ail2.20", + "https://doi.org/10.1002/ail2.20" + ], + [ + "reference", + "url", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 751450063096904044, + 2161551171101074414, + null, + null, + 185, + 216, + 185, + 216, + 42, + 58, + true, + "https://doi.org/10.1002/ail2.20", + "https://doi.org/10.1002/ail2.20" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 329104161533497127, + 15533708503938485693, + null, + null, + 185, + 190, + 185, + 190, + 42, + 43, + true, + "https", + "https" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12178341415895452239, + 2509341829612471905, + null, + null, + 193, + 196, + 193, + 196, + 46, + 47, + true, + "doi", + "doi" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 12178341415895623052, + 2509292496994469077, + null, + null, + 197, + 200, + 197, + 200, + 48, + 49, + true, + "org", + "org" + ], + [ + "term", + "single-term", + 2986547206451163051, + "TEXT", + "#/texts/189", + 1.0, + 389609625537446556, + 7737228572826305234, + null, + null, + 208, + 212, + 208, + 212, + 53, + 55, + true, + "/ail", + "/ail" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 14654386914267794441, + 12796143052106760105, + null, + null, + 0, + 8, + 0, + 8, + 0, + 1, + true, + "26895595", + "26895595" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10996423793555931038, + 10004407305162661320, + null, + null, + 8, + 18, + 8, + 18, + 1, + 6, + true, + ", 2020, 2,", + ", 2020, 2," + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548777262, + 8826555294676663632, + null, + null, + 10, + 14, + 10, + 14, + 2, + 3, + true, + "2020", + "2020" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 17767354399704235162, + 7753390158484899261, + null, + null, + 16, + 17, + 16, + 17, + 4, + 5, + true, + "2", + "2" + ], + [ + "sentence", + "proper", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10933383461306782608, + 10178418358179275356, + null, + null, + 19, + 125, + 19, + 125, + 6, + 41, + true, + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023].", + "Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]." + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 8536069645534292969, + 16063604623463467342, + null, + null, + 35, + 87, + 35, + 87, + 8, + 29, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20,", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20," + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 3856967589249015473, + 3576147774941915841, + null, + null, + 35, + 86, + 35, + 86, + 8, + 28, + true, + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20", + "https://onlinelibrary.wiley.com/doi/10.1002/ail2.20" + ], + [ + "link", + "doi", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 1697220653346092555, + 8458710314769009562, + null, + null, + 67, + 87, + 67, + 87, + 18, + 29, + true, + "doi/10.1002/ail2.20,", + "doi/10.1002/ail2.20," + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 8104408072666212335, + 13552219042525319352, + null, + null, + 71, + 78, + 71, + 78, + 20, + 23, + true, + "10.1002", + "10.1002" + ], + [ + "numval", + "fval", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548868096, + 8826558551385119058, + null, + null, + 82, + 86, + 82, + 86, + 25, + 28, + true, + "2.20", + "2.20" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12466457873768409517, + 3430070082404029638, + null, + null, + 88, + 108, + 88, + 108, + 29, + 32, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "parenthesis", + "square brackets", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "expression", + "wtoken-concatenation", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15691754593896323724, + 15433429984583237828, + null, + null, + 112, + 124, + 112, + 124, + 33, + 40, + true, + "[23/08/2023]", + "[23/08/2023]" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15441160910541481791, + 3518619573290839093, + null, + null, + 113, + 115, + 113, + 115, + 34, + 35, + true, + "23", + "23" + ], + [ + "numval", + "ival", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 15441160910541481543, + 3518617976696906498, + null, + null, + 116, + 118, + 116, + 118, + 36, + 37, + true, + "08", + "08" + ], + [ + "numval", + "year", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 389609625548777251, + 8826555296349648778, + null, + null, + 119, + 123, + 119, + 123, + 38, + 39, + true, + "2023", + "2023" + ], + [ + "sentence", + "improper", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10588328148713066663, + 14496609285345956363, + null, + null, + 126, + 319, + 126, + 319, + 41, + 82, + true, + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", + "See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 329104161846385964, + 16017248647642597247, + null, + null, + 134, + 139, + 134, + 139, + 43, + 44, + true, + "Terms", + "Terms" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 969969168017005656, + 2961182532179915323, + null, + null, + 144, + 154, + 144, + 154, + 45, + 46, + true, + "Conditions", + "Conditions" + ], + [ + "parenthesis", + "round brackets", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12213187056216195918, + 14309760985361468471, + null, + null, + 155, + 209, + 155, + 209, + 46, + 63, + true, + "(https://onlinelibrary.wiley.com/terms-and-conditions)", + "(https://onlinelibrary.wiley.com/terms-and-conditions)" + ], + [ + "link", + "url", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 594099663775968682, + 14698211805947073928, + null, + null, + 156, + 208, + 156, + 208, + 47, + 62, + true, + "https://onlinelibrary.wiley.com/terms-and-conditions", + "https://onlinelibrary.wiley.com/terms-and-conditions" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12466457873768409517, + 3430070082403846184, + null, + null, + 213, + 233, + 213, + 233, + 64, + 67, + true, + "Wiley Online Library", + "Wiley Online Library" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 329104161825278214, + 16021621362593374209, + null, + null, + 238, + 243, + 238, + 243, + 68, + 69, + true, + "rules", + "rules" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 12178341415895516060, + 12061595171928625555, + null, + null, + 247, + 250, + 247, + 250, + 70, + 71, + true, + "use", + "use" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 10086796047802705645, + 11637015082128438412, + null, + null, + 252, + 263, + 252, + 263, + 72, + 74, + true, + "OA articles", + "OA articles" + ], + [ + "term", + "single-term", + 18391264192891079539, + "TEXT", + "#/texts/190", + 1.0, + 6687370681685741393, + 17939310132506951168, + null, + null, + 284, + 319, + 284, + 319, + 78, + 82, + true, + "applicable Creative Commons License", + "applicable Creative Commons License" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482672, + 3558959168916500461, + 0, + 2, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-1", + "-1" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482673, + 3558959168967845780, + 0, + 3, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-2", + "-2" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482674, + 3558959169084991311, + 0, + 4, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-3", + "-3" + ], + [ + "numval", + "ival", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 15441160910541482676, + 3558959170275494348, + 0, + 5, + 3, + 5, + 3, + 5, + 1, + 3, + true, + "-5", + "-5" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689708475978, + 1, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618103388257, + 1, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774615713296517, + 1, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976884381, + 1, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995622, + 7990774618160743993, + 2, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.93", + "0.93" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774615712524481, + 2, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976098009, + 2, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625536250803, + 7990774066976110280, + 2, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "1.00", + "1.00" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995293, + 7990774599790700074, + 3, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.62", + "0.62" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995424, + 7990768689730984037, + 3, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.80", + "0.80" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995433, + 7990768688117646262, + 3, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.87", + "0.87" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617730131452, + 3, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995492, + 7990768692352137559, + 4, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.73", + "0.73" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995616, + 7990774618481181961, + 4, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.91", + "0.91" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617741217753, + 4, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612563908250, + 4, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689764177354, + 5, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995623, + 7990774617746212517, + 5, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.94", + "0.94" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612589838230, + 5, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774616182657591, + 5, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995426, + 7990768689764403839, + 6, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.82", + "0.82" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774618567229989, + 6, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995620, + 7990774618125993935, + 6, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.95", + "0.95" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612590090226, + 6, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995494, + 7990768689217789732, + 7, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.75", + "0.75" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774619359159209, + 7, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618108893234, + 7, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612570894765, + 7, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995435, + 7990774626011945031, + 8, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.89", + "0.89" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618110730915, + 8, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.96", + "0.96" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995626, + 7990774612562839849, + 8, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.97", + "0.97" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995627, + 7990774616172489304, + 8, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.98", + "0.98" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995429, + 7990774613602439211, + 9, + 2, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.83", + "0.83" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995617, + 7990774619353439571, + 9, + 3, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.92", + "0.92" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995620, + 7990774618123099565, + 9, + 4, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.95", + "0.95" + ], + [ + "numval", + "fval", + 12469893451248582632, + "TABLE", + "#/tables/0", + 1.0, + 389609625535995621, + 7990774618110462820, + 9, + 5, + 0, + 4, + 0, + 4, + 0, + 3, + true, + "0.96", + "0.96" + ] + ], + "headers": [ + "type", + "subtype", + "subj_hash", + "subj_name", + "subj_path", + "conf", + "hash", + "ihash", + "coor_i", + "coor_j", + "char_i", + "char_j", + "ctok_i", + "ctok_j", + "wtok_i", + "wtok_j", + "wtok-match", + "name", + "original" + ] + }, + "meta": [ + { + "$ref": "#/page-headers/0" + }, + { + "$ref": "#/page-headers/1" + }, + { + "$ref": "#/page-headers/2" + }, + { + "$ref": "#/page-headers/3" + }, + { + "$ref": "#/footnotes/0" + }, + { + "$ref": "#/footnotes/1" + }, + { + "$ref": "#/page-footers/0" + }, + { + "$ref": "#/page-footers/1" + }, + { + "$ref": "#/page-headers/4" + }, + { + "$ref": "#/page-headers/5" + }, + { + "$ref": "#/figures/0/captions/0" + }, + { + "$ref": "#/page-headers/6" + }, + { + "$ref": "#/page-headers/7" + }, + { + "$ref": "#/page-headers/8" + }, + { + "$ref": "#/page-headers/9" + }, + { + "$ref": "#/page-headers/10" + }, + { + "$ref": "#/page-headers/11" + }, + { + "$ref": "#/figures/2/captions/0" + }, + { + "$ref": "#/page-headers/12" + }, + { + "$ref": "#/figures/3/captions/0" + }, + { + "$ref": "#/page-headers/13" + }, + { + "$ref": "#/page-headers/14" + }, + { + "$ref": "#/figures/5/captions/0" + }, + { + "$ref": "#/page-headers/15" + }, + { + "$ref": "#/figures/6/captions/0" + }, + { + "$ref": "#/page-headers/16" + }, + { + "$ref": "#/tables/0/captions/0" + }, + { + "$ref": "#/page-headers/17" + }, + { + "$ref": "#/page-headers/18" + } + ], + "model-application": { + "message": "success", + "success": true + }, + "other": [], + "page-dimensions": [ + { + "height": 782.36, + "page": 1, + "width": 595.28 + }, + { + "height": 782.36, + "page": 2, + "width": 595.28 + }, + { + "height": 782.36, + "page": 3, + "width": 595.28 + }, + { + "height": 782.36, + "page": 4, + "width": 595.28 + }, + { + "height": 782.36, + "page": 5, + "width": 595.28 + }, + { + "height": 782.36, + "page": 6, + "width": 595.28 + }, + { + "height": 782.36, + "page": 7, + "width": 595.28 + }, + { + "height": 782.36, + "page": 8, + "width": 595.28 + }, + { + "height": 782.36, + "page": 9, + "width": 595.28 + }, + { + "height": 782.36, + "page": 10, + "width": 595.28 + }, + { + "height": 782.36, + "page": 11, + "width": 595.28 + }, + { + "height": 782.36, + "page": 12, + "width": 595.28 + }, + { + "height": 782.36, + "page": 13, + "width": 595.28 + }, + { + "height": 782.36, + "page": 14, + "width": 595.28 + }, + { + "height": 782.36, + "page": 15, + "width": 595.28 + } + ], + "page-elements": [ + { + "bbox": [ + 44.79, + 743.58, + 131.78, + 750.79 + ], + "iref": "#/page-headers/0", + "name": "page-header", + "orig-order": 15, + "page": 1, + "span": [ + 0, + 28 + ], + "sref": "#/page-elements/0", + "text-order": 0, + "type": "page-header" + }, + { + "bbox": [ + 146.33, + 744.09, + 229.31, + 751.44 + ], + "iref": "#/page-headers/1", + "name": "page-header", + "orig-order": 16, + "page": 1, + "span": [ + 0, + 26 + ], + "sref": "#/page-elements/1", + "text-order": 1, + "type": "page-header" + }, + { + "bbox": [ + 243.78, + 743.95, + 332.99, + 751.35 + ], + "iref": "#/page-headers/2", + "name": "page-header", + "orig-order": 17, + "page": 1, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/2", + "text-order": 2, + "type": "page-header" + }, + { + "bbox": [ + 44.69, + 730.71, + 106.12, + 737.3 + ], + "iref": "#/page-headers/3", + "name": "page-header", "orig-order": 18, "page": 1, "span": [ 0, - 21 + 21 + ], + "sref": "#/page-elements/3", + "text-order": 3, + "type": "page-header" + }, + { + "bbox": [ + 43.96, + 702.4, + 91.95, + 712.1 + ], + "iref": "#/texts/0", + "name": "subtitle-level-1", + "orig-order": 0, + "page": 1, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/4", + "text-order": 4, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.71, + 631.27, + 520.77, + 672.01 + ], + "iref": "#/texts/1", + "name": "subtitle-level-1", + "orig-order": 1, + "page": 1, + "span": [ + 0, + 97 + ], + "sref": "#/page-elements/5", + "text-order": 5, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 593.61, + 146.47, + 606.47 + ], + "iref": "#/texts/2", + "name": "subtitle-level-1", + "orig-order": 2, + "page": 1, + "span": [ + 0, + 17 + ], + "sref": "#/page-elements/6", + "text-order": 6, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 160.1, + 593.72, + 163.59, + 605.11 + ], + "iref": "#/texts/3", + "name": "text", + "orig-order": 3, + "page": 1, + "span": [ + 0, + 1 + ], + "sref": "#/page-elements/7", + "text-order": 7, + "type": "paragraph" + }, + { + "bbox": [ + 170.39, + 593.44, + 265.12, + 607.21 + ], + "iref": "#/texts/4", + "name": "subtitle-level-1", + "orig-order": 4, + "page": 1, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/8", + "text-order": 8, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 274.56, + 593.72, + 278.06, + 605.11 + ], + "iref": "#/texts/5", + "name": "text", + "orig-order": 5, + "page": 1, + "span": [ + 0, + 1 + ], + "sref": "#/page-elements/9", + "text-order": 9, + "type": "paragraph" + }, + { + "bbox": [ + 290.04, + 593.26, + 387.63, + 606.96 + ], + "iref": "#/texts/6", + "name": "text", + "orig-order": 6, + "page": 1, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/10", + "text-order": 10, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 559.6, + 182.68, + 567.3 + ], + "iref": "#/texts/7", + "name": "text", + "orig-order": 7, + "page": 1, + "span": [ + 0, + 38 + ], + "sref": "#/page-elements/11", + "text-order": 11, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 493.49, + 164.66, + 545.31 + ], + "iref": "#/texts/8", + "name": "text", + "orig-order": 8, + "page": 1, + "span": [ + 0, + 121 + ], + "sref": "#/page-elements/12", + "text-order": 12, + "type": "paragraph" + }, + { + "bbox": [ + 209.19, + 552.25, + 249.13, + 561.74 + ], + "iref": "#/texts/9", + "name": "subtitle-level-1", + "orig-order": 9, + "page": 1, + "span": [ + 0, + 8 + ], + "sref": "#/page-elements/13", + "text-order": 13, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 208.61, + 251.59, + 543.86, + 547.04 + ], + "iref": "#/texts/10", + "name": "text", + "orig-order": 10, + "page": 1, + "span": [ + 0, + 1624 + ], + "sref": "#/page-elements/14", + "text-order": 14, + "type": "paragraph" + }, + { + "bbox": [ + 209.21, + 228.2, + 269.01, + 237.28 + ], + "iref": "#/texts/11", + "name": "subtitle-level-1", + "orig-order": 11, + "page": 1, + "span": [ + 0, + 8 + ], + "sref": "#/page-elements/15", + "text-order": 15, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 208.8, + 214.08, + 401.03, + 222.97 + ], + "iref": "#/texts/12", + "name": "text", + "orig-order": 12, + "page": 1, + "span": [ + 0, + 53 + ], + "sref": "#/page-elements/16", + "text-order": 16, + "type": "paragraph" + }, + { + "bbox": [ + 44.28, + 187.52, + 189.72, + 199.66 + ], + "iref": "#/texts/13", + "name": "subtitle-level-1", + "orig-order": 13, + "page": 1, + "span": [ + 0, + 16 + ], + "sref": "#/page-elements/17", + "text-order": 17, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 96.98, + 552.65, + 172.33 + ], + "iref": "#/texts/14", + "name": "text", + "orig-order": 14, + "page": 1, + "span": [ + 0, + 639 + ], + "sref": "#/page-elements/18", + "text-order": 18, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 52.5, + 540.7, + 70.33 + ], + "iref": "#/footnotes/0", + "name": "footnote", + "orig-order": 19, + "page": 1, + "span": [ + 0, + 201 + ], + "sref": "#/page-elements/19", + "text-order": 19, + "type": "footnote" + }, + { + "bbox": [ + 44.79, + 42.45, + 272.17, + 50.21 + ], + "iref": "#/footnotes/1", + "name": "footnote", + "orig-order": 20, + "page": 1, + "span": [ + 0, + 75 + ], + "sref": "#/page-elements/20", + "text-order": 20, + "type": "footnote" + }, + { + "bbox": [ + 44.38, + 12.3, + 135.59, + 30.87 + ], + "iref": "#/page-footers/0", + "name": "page-footer", + "orig-order": 21, + "page": 1, + "span": [ + 0, + 64 + ], + "sref": "#/page-elements/21", + "text-order": 21, + "type": "page-footer" + }, + { + "bbox": [ + 400.53, + 22.28, + 550.62, + 29.7 + ], + "iref": "#/page-footers/1", + "name": "page-footer", + "orig-order": 22, + "page": 1, + "span": [ + 0, + 42 + ], + "sref": "#/page-elements/22", + "text-order": 22, + "type": "page-footer" + }, + { + "bbox": [ + 46.49, + 751.41, + 68.56, + 758.05 + ], + "iref": "#/texts/15", + "name": "text", + "orig-order": 40, + "page": 2, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/23", + "text-order": 23, + "type": "paragraph" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.96, + 758.33 + ], + "iref": "#/page-headers/4", + "name": "page-header", + "orig-order": 41, + "page": 2, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/24", + "text-order": 24, + "type": "page-header" + }, + { + "bbox": [ + 45.97, + 604.04, + 554.34, + 732.59 + ], + "iref": "#/texts/16", + "name": "text", + "orig-order": 23, + "page": 2, + "span": [ + 0, + 1082 + ], + "sref": "#/page-elements/25", + "text-order": 25, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 513.05, + 553.24, + 601.04 + ], + "iref": "#/texts/17", + "name": "text", + "orig-order": 24, + "page": 2, + "span": [ + 0, + 836 + ], + "sref": "#/page-elements/26", + "text-order": 26, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 500.06, + 340.6, + 509.47 + ], + "iref": "#/texts/18", + "name": "text", + "orig-order": 25, + "page": 2, + "span": [ + 0, + 69 + ], + "sref": "#/page-elements/27", + "text-order": 27, + "type": "paragraph" + }, + { + "bbox": [ + 57.86, + 487.08, + 492.16, + 496.64 + ], + "iref": "#/texts/19", + "name": "text", + "orig-order": 26, + "page": 2, + "span": [ + 0, + 101 + ], + "sref": "#/page-elements/28", + "text-order": 28, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 461.06, + 262.57, + 470.57 + ], + "iref": "#/texts/20", + "name": "list-item", + "orig-order": 27, + "page": 2, + "span": [ + 0, + 49 + ], + "sref": "#/page-elements/29", + "text-order": 29, + "type": "paragraph" + }, + { + "bbox": [ + 45.78, + 448.07, + 241.75, + 457.51 + ], + "iref": "#/texts/21", + "name": "list-item", + "orig-order": 28, + "page": 2, + "span": [ + 0, + 45 + ], + "sref": "#/page-elements/30", + "text-order": 30, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 435.03, + 174.96, + 444.55 + ], + "iref": "#/texts/22", + "name": "list-item", + "orig-order": 29, + "page": 2, + "span": [ + 0, + 29 + ], + "sref": "#/page-elements/31", + "text-order": 31, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 422.05, + 528.81, + 431.55 + ], + "iref": "#/texts/23", + "name": "list-item", + "orig-order": 30, + "page": 2, + "span": [ + 0, + 112 + ], + "sref": "#/page-elements/32", + "text-order": 32, + "type": "paragraph" + }, + { + "bbox": [ + 45.39, + 409.07, + 446.48, + 418.9 + ], + "iref": "#/texts/24", + "name": "list-item", + "orig-order": 31, + "page": 2, + "span": [ + 0, + 94 + ], + "sref": "#/page-elements/33", + "text-order": 33, + "type": "paragraph" + }, + { + "bbox": [ + 46.0, + 292.05, + 553.06, + 392.7 + ], + "iref": "#/texts/25", + "name": "text", + "orig-order": 32, + "page": 2, + "span": [ + 0, + 869 + ], + "sref": "#/page-elements/34", + "text-order": 34, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 265.89, + 551.48, + 288.82 + ], + "iref": "#/texts/26", + "name": "text", + "orig-order": 33, + "page": 2, + "span": [ + 0, + 140 + ], + "sref": "#/page-elements/35", + "text-order": 35, + "type": "paragraph" + }, + { + "bbox": [ + 46.37, + 240.06, + 515.49, + 249.53 + ], + "iref": "#/texts/27", + "name": "list-item", + "orig-order": 34, + "page": 2, + "span": [ + 0, + 111 + ], + "sref": "#/page-elements/36", + "text-order": 36, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 214.04, + 551.05, + 236.59 + ], + "iref": "#/texts/28", + "name": "list-item", + "orig-order": 35, + "page": 2, + "span": [ + 0, + 180 + ], + "sref": "#/page-elements/37", + "text-order": 37, + "type": "paragraph" + }, + { + "bbox": [ + 45.2, + 201.06, + 376.77, + 210.76 + ], + "iref": "#/texts/29", + "name": "list-item", + "orig-order": 36, + "page": 2, + "span": [ + 0, + 82 + ], + "sref": "#/page-elements/38", + "text-order": 38, + "type": "paragraph" + }, + { + "bbox": [ + 46.24, + 110.07, + 553.14, + 184.78 + ], + "iref": "#/texts/30", + "name": "text", + "orig-order": 37, + "page": 2, + "span": [ + 0, + 647 + ], + "sref": "#/page-elements/39", + "text-order": 39, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 84.05, + 550.51, + 107.71 + ], + "iref": "#/texts/31", + "name": "text", + "orig-order": 38, + "page": 2, + "span": [ + 0, + 202 + ], + "sref": "#/page-elements/40", + "text-order": 40, + "type": "paragraph" + }, + { + "bbox": [ + 45.98, + 45.05, + 551.84, + 81.25 + ], + "iref": "#/texts/32", + "name": "text", + "orig-order": 39, + "page": 2, + "span": [ + 0, + 346 + ], + "sref": "#/page-elements/41", + "text-order": 41, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/33", + "name": "text", + "orig-order": 42, + "page": 2, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/42", + "text-order": 42, + "type": "paragraph" + }, + { + "bbox": [ + 44.51, + 751.46, + 85.02, + 758.05 + ], + "iref": "#/page-headers/5", + "name": "page-header", + "orig-order": 50, + "page": 3, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/43", + "text-order": 43, + "type": "page-header" + }, + { + "bbox": [ + 528.55, + 751.41, + 550.62, + 758.05 + ], + "iref": "#/texts/34", + "name": "text", + "orig-order": 51, + "page": 3, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/44", + "text-order": 44, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 695.05, + 549.41, + 730.46 + ], + "iref": "#/texts/35", + "name": "text", + "orig-order": 43, + "page": 3, + "span": [ + 0, + 262 + ], + "sref": "#/page-elements/45", + "text-order": 45, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 655.52, + 378.15, + 666.9 + ], + "iref": "#/texts/36", + "name": "subtitle-level-1", + "orig-order": 44, + "page": 3, + "span": [ + 0, + 37 + ], + "sref": "#/page-elements/46", + "text-order": 46, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 552.05, + 549.78, + 639.58 + ], + "iref": "#/texts/37", + "name": "text", + "orig-order": 45, + "page": 3, + "span": [ + 0, + 796 + ], + "sref": "#/page-elements/47", + "text-order": 47, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 409.07, + 554.41, + 548.48 + ], + "iref": "#/texts/38", + "name": "text", + "orig-order": 46, + "page": 3, + "span": [ + 0, + 1141 + ], + "sref": "#/page-elements/48", + "text-order": 48, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 369.5, + 134.89, + 380.89 + ], + "iref": "#/texts/39", + "name": "subtitle-level-1", + "orig-order": 47, + "page": 3, + "span": [ + 0, + 14 + ], + "sref": "#/page-elements/49", + "text-order": 49, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.52, + 317.65, + 552.39, + 353.52 + ], + "iref": "#/texts/40", + "name": "text", + "orig-order": 48, + "page": 3, + "span": [ + 0, + 232 + ], + "sref": "#/page-elements/50", + "text-order": 50, + "type": "paragraph" + }, + { + "bbox": [ + 78.55, + 102.72, + 512.39, + 284.99 + ], + "iref": "#/figures/0", + "name": "picture", + "orig-order": 53, + "page": 3, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/51", + "text-order": 51, + "type": "figure" + }, + { + "bbox": [ + 44.78, + 45.4, + 545.79, + 89.47 + ], + "iref": "#/figures/0/captions/0", + "name": "caption", + "orig-order": 49, + "page": 3, + "span": [ + 0, + 498 + ], + "sref": "#/page-elements/52", + "text-order": 52, + "type": "caption" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/41", + "name": "text", + "orig-order": 52, + "page": 3, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/53", + "text-order": 53, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 751.41, + 68.56, + 758.05 + ], + "iref": "#/texts/42", + "name": "text", + "orig-order": 63, + "page": 4, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/54", + "text-order": 54, + "type": "paragraph" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.94, + 758.49 + ], + "iref": "#/page-headers/6", + "name": "page-header", + "orig-order": 64, + "page": 4, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/55", + "text-order": 55, + "type": "page-header" + }, + { + "bbox": [ + 45.14, + 720.49, + 157.76, + 732.34 + ], + "iref": "#/texts/43", + "name": "subtitle-level-1", + "orig-order": 54, + "page": 4, + "span": [ + 0, + 18 + ], + "sref": "#/page-elements/56", + "text-order": 56, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.49, + 656.08, + 553.55, + 704.77 + ], + "iref": "#/texts/44", + "name": "text", + "orig-order": 55, + "page": 4, + "span": [ + 0, + 403 + ], + "sref": "#/page-elements/57", + "text-order": 57, + "type": "paragraph" + }, + { + "bbox": [ + 45.56, + 604.04, + 553.09, + 652.89 + ], + "iref": "#/texts/45", + "name": "text", + "orig-order": 56, + "page": 4, + "span": [ + 0, + 417 + ], + "sref": "#/page-elements/58", + "text-order": 58, + "type": "paragraph" + }, + { + "bbox": [ + 45.66, + 565.09, + 552.86, + 600.94 + ], + "iref": "#/texts/46", + "name": "text", + "orig-order": 57, + "page": 4, + "span": [ + 0, + 282 + ], + "sref": "#/page-elements/59", + "text-order": 59, + "type": "paragraph" + }, + { + "bbox": [ + 45.5, + 525.52, + 161.91, + 536.91 + ], + "iref": "#/texts/47", + "name": "subtitle-level-1", + "orig-order": 58, + "page": 4, + "span": [ + 0, + 18 + ], + "sref": "#/page-elements/60", + "text-order": 60, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.28, + 435.03, + 552.78, + 509.81 + ], + "iref": "#/texts/48", + "name": "text", + "orig-order": 59, + "page": 4, + "span": [ + 0, + 647 + ], + "sref": "#/page-elements/61", + "text-order": 61, + "type": "paragraph" + }, + { + "bbox": [ + 46.0, + 370.07, + 551.75, + 431.6 + ], + "iref": "#/texts/49", + "name": "text", + "orig-order": 60, + "page": 4, + "span": [ + 0, + 542 + ], + "sref": "#/page-elements/62", + "text-order": 62, + "type": "paragraph" + }, + { + "bbox": [ + 46.38, + 304.92, + 551.43, + 366.63 + ], + "iref": "#/texts/50", + "name": "text", + "orig-order": 61, + "page": 4, + "span": [ + 0, + 580 + ], + "sref": "#/page-elements/63", + "text-order": 63, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 45.4, + 540.32, + 67.21 + ], + "iref": "#/texts/51", + "name": "text", + "orig-order": 62, + "page": 4, + "span": [ + 0, + 220 + ], + "sref": "#/page-elements/64", + "text-order": 64, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/52", + "name": "text", + "orig-order": 65, + "page": 4, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/65", + "text-order": 65, + "type": "paragraph" + }, + { + "bbox": [ + 44.04, + 751.31, + 85.72, + 759.73 + ], + "iref": "#/page-headers/7", + "name": "page-header", + "orig-order": 72, + "page": 5, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/66", + "text-order": 66, + "type": "page-header" + }, + { + "bbox": [ + 454.14, + 745.72, + 550.62, + 761.01 + ], + "iref": "#/figures/1", + "name": "picture", + "orig-order": 73, + "page": 5, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/67", + "text-order": 67, + "type": "figure" + }, + { + "bbox": [ + 44.79, + 483.4, + 548.26, + 529.32 + ], + "iref": "#/texts/53", + "name": "text", + "orig-order": 71, + "page": 5, + "span": [ + 0, + 421 + ], + "sref": "#/page-elements/68", + "text-order": 68, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 370.06, + 549.87, + 444.57 + ], + "iref": "#/texts/54", + "name": "text", + "orig-order": 66, + "page": 5, + "span": [ + 0, + 687 + ], + "sref": "#/page-elements/69", + "text-order": 69, + "type": "paragraph" + }, + { + "bbox": [ + 44.21, + 330.49, + 223.93, + 341.88 + ], + "iref": "#/texts/55", + "name": "subtitle-level-1", + "orig-order": 67, + "page": 5, + "span": [ + 0, + 31 + ], + "sref": "#/page-elements/70", + "text-order": 70, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 149.07, + 549.82, + 314.54 + ], + "iref": "#/texts/56", + "name": "text", + "orig-order": 68, + "page": 5, + "span": [ + 0, + 1517 + ], + "sref": "#/page-elements/71", + "text-order": 71, + "type": "paragraph" + }, + { + "bbox": [ + 43.95, + 109.51, + 254.48, + 120.89 + ], + "iref": "#/texts/57", + "name": "subtitle-level-1", + "orig-order": 69, + "page": 5, + "span": [ + 0, + 36 + ], + "sref": "#/page-elements/72", + "text-order": 72, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 45.01, + 549.14, + 93.61 + ], + "iref": "#/texts/58", + "name": "text", + "orig-order": 70, + "page": 5, + "span": [ + 0, + 384 + ], + "sref": "#/page-elements/73", + "text-order": 73, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/59", + "name": "text", + "orig-order": 74, + "page": 5, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/74", + "text-order": 74, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 751.41, + 68.56, + 758.05 + ], + "iref": "#/texts/60", + "name": "text", + "orig-order": 89, + "page": 6, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/75", + "text-order": 75, + "type": "paragraph" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.99, + 758.98 + ], + "iref": "#/page-headers/8", + "name": "page-header", + "orig-order": 90, + "page": 6, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/76", + "text-order": 76, + "type": "page-header" + }, + { + "bbox": [ + 45.78, + 669.06, + 554.4, + 730.82 + ], + "iref": "#/texts/61", + "name": "text", + "orig-order": 75, + "page": 6, + "span": [ + 0, + 564 + ], + "sref": "#/page-elements/77", + "text-order": 77, + "type": "paragraph" + }, + { + "bbox": [ + 45.75, + 629.49, + 148.0, + 641.57 + ], + "iref": "#/texts/62", + "name": "subtitle-level-1", + "orig-order": 76, + "page": 6, + "span": [ + 0, + 16 + ], + "sref": "#/page-elements/78", + "text-order": 78, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.49, + 591.05, + 552.9, + 613.81 + ], + "iref": "#/texts/63", + "name": "text", + "orig-order": 77, + "page": 6, + "span": [ + 0, + 225 + ], + "sref": "#/page-elements/79", + "text-order": 79, + "type": "paragraph" + }, + { + "bbox": [ + 46.45, + 552.05, + 553.36, + 575.29 + ], + "iref": "#/texts/64", + "name": "list-item", + "orig-order": 78, + "page": 6, + "span": [ + 0, + 179 + ], + "sref": "#/page-elements/80", + "text-order": 80, + "type": "paragraph" + }, + { + "bbox": [ + 45.74, + 526.08, + 553.54, + 548.9 + ], + "iref": "#/texts/65", + "name": "list-item", + "orig-order": 79, + "page": 6, + "span": [ + 0, + 133 + ], + "sref": "#/page-elements/81", + "text-order": 81, + "type": "paragraph" + }, + { + "bbox": [ + 44.88, + 513.04, + 481.36, + 523.51 + ], + "iref": "#/texts/66", + "name": "list-item", + "orig-order": 80, + "page": 6, + "span": [ + 0, + 101 + ], + "sref": "#/page-elements/82", + "text-order": 82, + "type": "paragraph" + }, + { + "bbox": [ + 46.39, + 435.03, + 553.39, + 497.02 + ], + "iref": "#/texts/67", + "name": "text", + "orig-order": 81, + "page": 6, + "span": [ + 0, + 525 + ], + "sref": "#/page-elements/83", + "text-order": 83, + "type": "paragraph" + }, + { + "bbox": [ + 45.55, + 344.04, + 555.01, + 432.12 + ], + "iref": "#/texts/68", + "name": "text", + "orig-order": 82, + "page": 6, + "span": [ + 0, + 693 + ], + "sref": "#/page-elements/84", + "text-order": 84, + "type": "paragraph" + }, + { + "bbox": [ + 46.26, + 304.47, + 469.55, + 315.86 + ], + "iref": "#/texts/69", + "name": "subtitle-level-1", + "orig-order": 83, + "page": 6, + "span": [ + 0, + 48 + ], + "sref": "#/page-elements/85", + "text-order": 85, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.49, + 265.93, + 552.64, + 288.61 + ], + "iref": "#/texts/70", + "name": "text", + "orig-order": 84, + "page": 6, + "span": [ + 0, + 166 + ], + "sref": "#/page-elements/86", + "text-order": 86, + "type": "paragraph" + }, + { + "bbox": [ + 46.38, + 240.05, + 429.52, + 249.76 + ], + "iref": "#/texts/71", + "name": "list-item", + "orig-order": 85, + "page": 6, + "span": [ + 0, + 92 + ], + "sref": "#/page-elements/87", + "text-order": 87, + "type": "paragraph" + }, + { + "bbox": [ + 45.62, + 227.09, + 346.36, + 237.67 + ], + "iref": "#/texts/72", + "name": "list-item", + "orig-order": 86, + "page": 6, + "span": [ + 0, + 73 + ], + "sref": "#/page-elements/88", + "text-order": 88, + "type": "paragraph" + }, + { + "bbox": [ + 45.32, + 162.06, + 553.89, + 210.65 + ], + "iref": "#/texts/73", + "name": "text", + "orig-order": 87, + "page": 6, + "span": [ + 0, + 472 + ], + "sref": "#/page-elements/89", + "text-order": 89, + "type": "paragraph" + }, + { + "bbox": [ + 45.76, + 71.07, + 554.23, + 158.8 + ], + "iref": "#/texts/74", + "name": "text", + "orig-order": 88, + "page": 6, + "span": [ + 0, + 761 + ], + "sref": "#/page-elements/90", + "text-order": 90, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/75", + "name": "text", + "orig-order": 91, + "page": 6, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/91", + "text-order": 91, + "type": "paragraph" + }, + { + "bbox": [ + 44.35, + 751.46, + 85.42, + 758.93 + ], + "iref": "#/page-headers/9", + "name": "page-header", + "orig-order": 103, + "page": 7, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/92", + "text-order": 92, + "type": "page-header" + }, + { + "bbox": [ + 528.55, + 751.41, + 550.62, + 758.05 + ], + "iref": "#/texts/76", + "name": "text", + "orig-order": 104, + "page": 7, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/93", + "text-order": 93, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 695.09, + 549.55, + 730.67 + ], + "iref": "#/texts/77", + "name": "text", + "orig-order": 92, + "page": 7, + "span": [ + 0, + 324 + ], + "sref": "#/page-elements/94", + "text-order": 94, + "type": "paragraph" + }, + { + "bbox": [ + 44.72, + 655.52, + 236.79, + 666.9 + ], + "iref": "#/texts/78", + "name": "subtitle-level-1", + "orig-order": 93, + "page": 7, + "span": [ + 0, + 32 + ], + "sref": "#/page-elements/95", + "text-order": 95, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 578.07, + 549.25, + 640.17 + ], + "iref": "#/texts/79", + "name": "text", + "orig-order": 94, + "page": 7, + "span": [ + 0, + 502 + ], + "sref": "#/page-elements/96", + "text-order": 96, + "type": "paragraph" + }, + { + "bbox": [ + 44.73, + 539.07, + 548.86, + 576.57 + ], + "iref": "#/texts/80", + "name": "text", + "orig-order": 95, + "page": 7, + "span": [ + 0, + 324 + ], + "sref": "#/page-elements/97", + "text-order": 97, + "type": "paragraph" + }, + { + "bbox": [ + 214.75, + 498.59, + 548.78, + 529.37 + ], + "iref": "#/texts/81", + "name": "formula", + "orig-order": 96, + "page": 7, + "span": [ + 0, + 92 + ], + "sref": "#/page-elements/98", + "text-order": 98, + "type": "equation" + }, + { + "bbox": [ + 44.78, + 435.04, + 548.75, + 470.53 + ], + "iref": "#/texts/82", + "name": "text", + "orig-order": 97, + "page": 7, + "span": [ + 0, + 327 + ], + "sref": "#/page-elements/99", + "text-order": 99, + "type": "paragraph" + }, + { + "bbox": [ + 234.89, + 399.49, + 549.15, + 425.9 + ], + "iref": "#/texts/83", + "name": "formula", + "orig-order": 98, + "page": 7, + "span": [ + 0, + 114 + ], + "sref": "#/page-elements/100", + "text-order": 100, + "type": "equation" + }, + { + "bbox": [ + 44.79, + 279.07, + 549.01, + 379.83 + ], + "iref": "#/texts/84", + "name": "text", + "orig-order": 99, + "page": 7, + "span": [ + 0, + 960 + ], + "sref": "#/page-elements/101", + "text-order": 101, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 253.05, + 549.3, + 275.76 + ], + "iref": "#/texts/85", + "name": "text", + "orig-order": 100, + "page": 7, + "span": [ + 0, + 204 + ], + "sref": "#/page-elements/102", + "text-order": 102, + "type": "paragraph" + }, + { + "bbox": [ + 43.78, + 213.48, + 380.19, + 224.87 + ], + "iref": "#/texts/86", + "name": "subtitle-level-1", + "orig-order": 101, + "page": 7, + "span": [ + 0, + 54 + ], + "sref": "#/page-elements/103", + "text-order": 103, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 58.08, + 550.32, + 197.49 + ], + "iref": "#/texts/87", + "name": "text", + "orig-order": 102, + "page": 7, + "span": [ + 0, + 1216 + ], + "sref": "#/page-elements/104", + "text-order": 104, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/88", + "name": "text", + "orig-order": 105, + "page": 7, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/105", + "text-order": 105, + "type": "paragraph" + }, + { + "bbox": [ + 45.74, + 751.41, + 68.56, + 758.99 + ], + "iref": "#/page-headers/10", + "name": "page-header", + "orig-order": 113, + "page": 8, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/106", + "text-order": 106, + "type": "page-header" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.92, + 758.39 + ], + "iref": "#/page-headers/11", + "name": "page-header", + "orig-order": 114, + "page": 8, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/107", + "text-order": 107, + "type": "page-header" + }, + { + "bbox": [ + 96.35, + 537.81, + 496.87, + 731.78 + ], + "iref": "#/figures/2", + "name": "picture", + "orig-order": 116, + "page": 8, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/108", + "text-order": 108, + "type": "figure" + }, + { + "bbox": [ + 46.0, + 491.8, + 543.2, + 523.78 + ], + "iref": "#/figures/2/captions/0", + "name": "caption", + "orig-order": 112, + "page": 8, + "span": [ + 0, + 268 + ], + "sref": "#/page-elements/109", + "text-order": 109, + "type": "caption" + }, + { + "bbox": [ + 46.49, + 370.06, + 551.98, + 457.64 + ], + "iref": "#/texts/89", + "name": "text", + "orig-order": 106, + "page": 8, + "span": [ + 0, + 745 + ], + "sref": "#/page-elements/110", + "text-order": 110, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 239.97, + 551.49, + 366.49 + ], + "iref": "#/texts/90", + "name": "text", + "orig-order": 107, + "page": 8, + "span": [ + 0, + 1027 + ], + "sref": "#/page-elements/111", + "text-order": 111, + "type": "paragraph" + }, + { + "bbox": [ + 45.14, + 200.5, + 333.74, + 211.89 + ], + "iref": "#/texts/91", + "name": "subtitle-level-1", + "orig-order": 108, + "page": 8, + "span": [ + 0, + 48 + ], + "sref": "#/page-elements/112", + "text-order": 112, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 45.91, + 162.06, + 551.37, + 184.45 + ], + "iref": "#/texts/92", + "name": "text", + "orig-order": 109, + "page": 8, + "span": [ + 0, + 179 + ], + "sref": "#/page-elements/113", + "text-order": 113, + "type": "paragraph" + }, + { + "bbox": [ + 46.22, + 84.05, + 550.91, + 158.49 + ], + "iref": "#/texts/93", + "name": "text", + "orig-order": 110, + "page": 8, + "span": [ + 0, + 643 + ], + "sref": "#/page-elements/114", + "text-order": 114, + "type": "paragraph" + }, + { + "bbox": [ + 44.99, + 45.02, + 552.19, + 80.53 + ], + "iref": "#/texts/94", + "name": "text", + "orig-order": 111, + "page": 8, + "span": [ + 0, + 262 + ], + "sref": "#/page-elements/115", + "text-order": 115, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/95", + "name": "text", + "orig-order": 115, + "page": 8, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/116", + "text-order": 116, + "type": "paragraph" + }, + { + "bbox": [ + 44.35, + 751.46, + 84.67, + 758.05 + ], + "iref": "#/page-headers/12", + "name": "page-header", + "orig-order": 126, + "page": 9, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/117", + "text-order": 117, + "type": "page-header" + }, + { + "bbox": [ + 528.55, + 751.41, + 550.62, + 758.05 + ], + "iref": "#/texts/96", + "name": "text", + "orig-order": 127, + "page": 9, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/118", + "text-order": 118, + "type": "paragraph" + }, + { + "bbox": [ + 116.26, + 507.84, + 473.64, + 731.27 + ], + "iref": "#/figures/3", + "name": "picture", + "orig-order": 129, + "page": 9, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/119", + "text-order": 119, + "type": "figure" + }, + { + "bbox": [ + 44.79, + 447.43, + 541.61, + 491.69 + ], + "iref": "#/figures/3/captions/0", + "name": "caption", + "orig-order": 125, + "page": 9, + "span": [ + 0, + 473 + ], + "sref": "#/page-elements/120", + "text-order": 120, + "type": "caption" + }, + { + "bbox": [ + 44.42, + 395.52, + 176.33, + 406.91 + ], + "iref": "#/texts/97", + "name": "subtitle-level-1", + "orig-order": 117, + "page": 9, + "span": [ + 0, + 22 + ], + "sref": "#/page-elements/121", + "text-order": 121, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.79, + 343.81, + 548.77, + 379.57 + ], + "iref": "#/texts/98", + "name": "text", + "orig-order": 118, + "page": 9, + "span": [ + 0, + 270 + ], + "sref": "#/page-elements/122", + "text-order": 122, + "type": "paragraph" + }, + { + "bbox": [ + 245.62, + 303.56, + 549.35, + 334.34 + ], + "iref": "#/texts/99", + "name": "formula", + "orig-order": 119, + "page": 9, + "span": [ + 0, + 72 + ], + "sref": "#/page-elements/123", + "text-order": 123, + "type": "equation" + }, + { + "bbox": [ + 44.27, + 266.09, + 323.55, + 275.53 + ], + "iref": "#/texts/100", + "name": "text", + "orig-order": 120, + "page": 9, + "span": [ + 0, + 69 + ], + "sref": "#/page-elements/124", + "text-order": 124, + "type": "paragraph" + }, + { + "bbox": [ + 44.09, + 226.52, + 183.25, + 237.91 + ], + "iref": "#/texts/101", + "name": "subtitle-level-1", + "orig-order": 121, + "page": 9, + "span": [ + 0, + 23 + ], + "sref": "#/page-elements/125", + "text-order": 125, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 44.13, + 149.08, + 549.16, + 210.87 + ], + "iref": "#/texts/102", + "name": "text", + "orig-order": 122, + "page": 9, + "span": [ + 0, + 580 + ], + "sref": "#/page-elements/126", + "text-order": 126, + "type": "paragraph" + }, + { + "bbox": [ + 213.45, + 108.0, + 548.78, + 139.26 + ], + "iref": "#/texts/103", + "name": "formula", + "orig-order": 123, + "page": 9, + "span": [ + 0, + 147 + ], + "sref": "#/page-elements/127", + "text-order": 127, + "type": "equation" + }, + { + "bbox": [ + 44.79, + 45.05, + 548.8, + 80.76 + ], + "iref": "#/texts/104", + "name": "text", + "orig-order": 124, + "page": 9, + "span": [ + 0, + 307 + ], + "sref": "#/page-elements/128", + "text-order": 128, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/105", + "name": "text", + "orig-order": 128, + "page": 9, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/129", + "text-order": 129, + "type": "paragraph" + }, + { + "bbox": [ + 45.89, + 743.98, + 143.19, + 761.31 + ], + "iref": "#/figures/4", + "name": "picture", + "orig-order": 142, + "page": 10, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/130", + "text-order": 130, + "type": "figure" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.89, + 758.54 + ], + "iref": "#/page-headers/13", + "name": "page-header", + "orig-order": 143, + "page": 10, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/131", + "text-order": 131, + "type": "page-header" + }, + { + "bbox": [ + 44.98, + 720.48, + 201.3, + 732.0 + ], + "iref": "#/texts/106", + "name": "subtitle-level-1", + "orig-order": 130, + "page": 10, + "span": [ + 0, + 26 + ], + "sref": "#/page-elements/132", + "text-order": 132, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.1, + 656.08, + 554.12, + 705.22 + ], + "iref": "#/texts/107", + "name": "text", + "orig-order": 131, + "page": 10, + "span": [ + 0, + 390 + ], + "sref": "#/page-elements/133", + "text-order": 133, + "type": "paragraph" + }, + { + "bbox": [ + 45.49, + 616.51, + 214.94, + 627.93 + ], + "iref": "#/texts/108", + "name": "subtitle-level-1", + "orig-order": 132, + "page": 10, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/134", + "text-order": 134, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 45.36, + 578.07, + 552.45, + 600.56 + ], + "iref": "#/texts/109", + "name": "text", + "orig-order": 133, + "page": 10, + "span": [ + 0, + 172 + ], + "sref": "#/page-elements/135", + "text-order": 135, + "type": "paragraph" + }, + { + "bbox": [ + 46.01, + 500.06, + 551.9, + 574.5 + ], + "iref": "#/texts/110", + "name": "text", + "orig-order": 134, + "page": 10, + "span": [ + 0, + 691 + ], + "sref": "#/page-elements/136", + "text-order": 136, + "type": "paragraph" + }, + { + "bbox": [ + 45.8, + 448.07, + 552.13, + 496.56 + ], + "iref": "#/texts/111", + "name": "text", + "orig-order": 135, + "page": 10, + "span": [ + 0, + 420 + ], + "sref": "#/page-elements/137", + "text-order": 137, + "type": "paragraph" + }, + { + "bbox": [ + 46.02, + 408.5, + 321.51, + 419.89 + ], + "iref": "#/texts/112", + "name": "subtitle-level-1", + "orig-order": 136, + "page": 10, + "span": [ + 0, + 31 + ], + "sref": "#/page-elements/138", + "text-order": 138, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.3, + 357.08, + 550.61, + 392.46 + ], + "iref": "#/texts/113", + "name": "text", + "orig-order": 137, + "page": 10, + "span": [ + 0, + 334 + ], + "sref": "#/page-elements/139", + "text-order": 139, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 253.05, + 551.04, + 353.45 + ], + "iref": "#/texts/114", + "name": "text", + "orig-order": 138, + "page": 10, + "span": [ + 0, + 847 + ], + "sref": "#/page-elements/140", + "text-order": 140, + "type": "paragraph" + }, + { + "bbox": [ + 46.44, + 188.08, + 551.4, + 249.48 + ], + "iref": "#/texts/115", + "name": "text", + "orig-order": 139, + "page": 10, + "span": [ + 0, + 477 + ], + "sref": "#/page-elements/141", + "text-order": 141, + "type": "paragraph" + }, + { + "bbox": [ + 46.28, + 136.04, + 550.96, + 184.45 + ], + "iref": "#/texts/116", + "name": "text", + "orig-order": 140, + "page": 10, + "span": [ + 0, + 404 + ], + "sref": "#/page-elements/142", + "text-order": 142, + "type": "paragraph" + }, + { + "bbox": [ + 46.42, + 58.08, + 551.04, + 132.46 + ], + "iref": "#/texts/117", + "name": "text", + "orig-order": 141, + "page": 10, + "span": [ + 0, + 572 + ], + "sref": "#/page-elements/143", + "text-order": 143, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/118", + "name": "text", + "orig-order": 144, + "page": 10, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/144", + "text-order": 144, + "type": "paragraph" + }, + { + "bbox": [ + 43.99, + 751.46, + 84.67, + 758.05 + ], + "iref": "#/page-headers/14", + "name": "page-header", + "orig-order": 150, + "page": 11, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/145", + "text-order": 145, + "type": "page-header" + }, + { + "bbox": [ + 525.15, + 751.41, + 548.78, + 758.05 + ], + "iref": "#/texts/119", + "name": "text", + "orig-order": 151, + "page": 11, + "span": [ + 0, + 6 + ], + "sref": "#/page-elements/146", + "text-order": 146, + "type": "paragraph" + }, + { + "bbox": [ + 48.37, + 477.84, + 548.36, + 732.33 + ], + "iref": "#/figures/5", + "name": "picture", + "orig-order": 153, + "page": 11, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/147", + "text-order": 147, + "type": "figure" + }, + { + "bbox": [ + 44.79, + 428.34, + 541.05, + 460.56 + ], + "iref": "#/figures/5/captions/0", + "name": "caption", + "orig-order": 149, + "page": 11, + "span": [ + 0, + 275 ], - "sref": "#/page-elements/3", - "text-order": 3, - "type": "page-header" + "sref": "#/page-elements/148", + "text-order": 148, + "type": "caption" }, { "bbox": [ - 43.95979690551758, - 702.3956298828125, - 91.94560241699219, - 712.1011962890625 + 44.79, + 331.06, + 550.65, + 405.5 ], - "iref": "#/texts/0", - "name": "subtitle-level-1", - "orig-order": 0, - "page": 1, + "iref": "#/texts/120", + "name": "text", + "orig-order": 145, + "page": 11, "span": [ 0, - 6 + 596 ], - "sref": "#/page-elements/4", - "text-order": 4, - "type": "subtitle-level-1" + "sref": "#/page-elements/149", + "text-order": 149, + "type": "paragraph" }, { "bbox": [ - 44.709346771240234, - 631.2674560546875, - 520.7667236328125, - 672.0067749023438 + 44.49, + 291.49, + 365.99, + 302.88 ], - "iref": "#/texts/1", + "iref": "#/texts/121", "name": "subtitle-level-1", - "orig-order": 1, - "page": 1, + "orig-order": 146, + "page": 11, "span": [ 0, - 97 + 39 ], - "sref": "#/page-elements/5", - "text-order": 5, + "sref": "#/page-elements/150", + "text-order": 150, "type": "subtitle-level-1" }, { "bbox": [ - 44.78739929199219, - 593.6065673828125, - 146.4720458984375, - 606.4735717773438 + 44.79, + 175.04, + 549.79, + 275.5 ], - "iref": "#/texts/2", + "iref": "#/texts/122", + "name": "text", + "orig-order": 147, + "page": 11, + "span": [ + 0, + 861 + ], + "sref": "#/page-elements/151", + "text-order": 151, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 45.04, + 549.44, + 171.59 + ], + "iref": "#/texts/123", + "name": "text", + "orig-order": 148, + "page": 11, + "span": [ + 0, + 1189 + ], + "sref": "#/page-elements/152", + "text-order": 152, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/124", + "name": "text", + "orig-order": 152, + "page": 11, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/153", + "text-order": 153, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 751.41, + 51.25, + 758.05 + ], + "iref": "#/texts/125", + "name": "text", + "orig-order": 166, + "page": 12, + "span": [ + 0, + 2 + ], + "sref": "#/page-elements/154", + "text-order": 154, + "type": "paragraph" + }, + { + "bbox": [ + 56.12, + 751.41, + 70.12, + 758.05 + ], + "iref": "#/texts/126", + "name": "text", + "orig-order": 167, + "page": 12, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/155", + "text-order": 155, + "type": "paragraph" + }, + { + "bbox": [ + 510.63, + 751.46, + 550.74, + 758.25 + ], + "iref": "#/page-headers/15", + "name": "page-header", + "orig-order": 168, + "page": 12, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/156", + "text-order": 156, + "type": "page-header" + }, + { + "bbox": [ + 55.88, + 606.85, + 541.85, + 729.68 + ], + "iref": "#/figures/6", + "name": "picture", + "orig-order": 164, + "page": 12, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/157", + "text-order": 157, + "type": "figure" + }, + { + "bbox": [ + 44.77, + 585.46, + 387.12, + 593.59 + ], + "iref": "#/figures/6/captions/0", + "name": "caption", + "orig-order": 165, + "page": 12, + "span": [ + 0, + 88 + ], + "sref": "#/page-elements/158", + "text-order": 158, + "type": "caption" + }, + { + "bbox": [ + 45.36, + 526.08, + 552.56, + 548.48 + ], + "iref": "#/texts/127", + "name": "text", + "orig-order": 154, + "page": 12, + "span": [ + 0, + 171 + ], + "sref": "#/page-elements/159", + "text-order": 159, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 448.07, + 552.17, + 522.45 + ], + "iref": "#/texts/128", + "name": "text", + "orig-order": 155, + "page": 12, + "span": [ + 0, + 596 + ], + "sref": "#/page-elements/160", + "text-order": 160, + "type": "paragraph" + }, + { + "bbox": [ + 46.23, + 382.82, + 552.13, + 444.6 + ], + "iref": "#/texts/129", + "name": "text", + "orig-order": 156, + "page": 12, + "span": [ + 0, + 460 + ], + "sref": "#/page-elements/161", + "text-order": 161, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 357.08, + 309.65, + 366.49 + ], + "iref": "#/texts/130", + "name": "list-item", + "orig-order": 157, + "page": 12, + "span": [ + 0, + 57 + ], + "sref": "#/page-elements/162", + "text-order": 162, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 344.04, + 336.83, + 353.64 + ], + "iref": "#/texts/131", + "name": "list-item", + "orig-order": 158, + "page": 12, + "span": [ + 0, + 65 + ], + "sref": "#/page-elements/163", + "text-order": 163, + "type": "paragraph" + }, + { + "bbox": [ + 45.47, + 331.06, + 478.31, + 340.55 + ], + "iref": "#/texts/132", + "name": "list-item", + "orig-order": 159, + "page": 12, + "span": [ + 0, + 101 + ], + "sref": "#/page-elements/164", + "text-order": 164, + "type": "paragraph" + }, + { + "bbox": [ + 46.17, + 214.05, + 551.78, + 314.45 + ], + "iref": "#/texts/133", + "name": "text", + "orig-order": 160, + "page": 12, + "span": [ + 0, + 923 + ], + "sref": "#/page-elements/165", + "text-order": 165, + "type": "paragraph" + }, + { + "bbox": [ + 46.26, + 149.08, + 551.37, + 210.69 + ], + "iref": "#/texts/134", + "name": "text", + "orig-order": 161, + "page": 12, + "span": [ + 0, + 569 + ], + "sref": "#/page-elements/166", + "text-order": 166, + "type": "paragraph" + }, + { + "bbox": [ + 45.71, + 71.07, + 551.88, + 145.51 + ], + "iref": "#/texts/135", + "name": "text", + "orig-order": 162, + "page": 12, + "span": [ + 0, + 698 + ], + "sref": "#/page-elements/167", + "text-order": 167, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 45.04, + 551.84, + 67.67 + ], + "iref": "#/texts/136", + "name": "text", + "orig-order": 163, + "page": 12, + "span": [ + 0, + 218 + ], + "sref": "#/page-elements/168", + "text-order": 168, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/137", + "name": "text", + "orig-order": 169, + "page": 12, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/169", + "text-order": 169, + "type": "paragraph" + }, + { + "bbox": [ + 44.32, + 751.46, + 84.67, + 758.05 + ], + "iref": "#/page-headers/16", + "name": "page-header", + "orig-order": 177, + "page": 13, + "span": [ + 0, + 13 + ], + "sref": "#/page-elements/170", + "text-order": 170, + "type": "page-header" + }, + { + "bbox": [ + 525.15, + 751.41, + 529.91, + 758.05 + ], + "iref": "#/texts/138", + "name": "text", + "orig-order": 178, + "page": 13, + "span": [ + 0, + 2 + ], + "sref": "#/page-elements/171", + "text-order": 171, + "type": "paragraph" + }, + { + "bbox": [ + 534.78, + 751.41, + 548.78, + 758.05 + ], + "iref": "#/texts/139", + "name": "text", + "orig-order": 179, + "page": 13, + "span": [ + 0, + 5 + ], + "sref": "#/page-elements/172", + "text-order": 172, + "type": "paragraph" + }, + { + "bbox": [ + 45.16, + 607.38, + 548.95, + 731.49 + ], + "iref": "#/figures/7", + "name": "picture", + "orig-order": 181, + "page": 13, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/173", + "text-order": 173, + "type": "figure" + }, + { + "bbox": [ + 44.35, + 537.04, + 539.26, + 593.74 + ], + "iref": "#/figures/7/captions/0", + "name": "text", + "orig-order": 174, + "page": 13, + "span": [ + 0, + 608 + ], + "sref": "#/page-elements/174", + "text-order": 174, + "type": "paragraph" + }, + { + "bbox": [ + 44.49, + 441.91, + 181.12, + 498.28 + ], + "iref": "#/tables/0/captions/0", + "name": "caption", + "orig-order": 175, + "page": 13, + "span": [ + 0, + 160 + ], + "sref": "#/page-elements/175", + "text-order": 175, + "type": "caption" + }, + { + "bbox": [ + 210.0, + 346.58, + 549.02, + 499.13 + ], + "iref": "#/tables/0", + "name": "table", + "orig-order": 176, + "page": 13, + "span": [ + 0, + 0 + ], + "sref": "#/page-elements/176", + "text-order": 176, + "type": "table" + }, + { + "bbox": [ + 44.79, + 292.06, + 549.02, + 314.45 + ], + "iref": "#/texts/140", + "name": "text", + "orig-order": 170, + "page": 13, + "span": [ + 0, + 191 + ], + "sref": "#/page-elements/177", + "text-order": 177, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 188.08, + 550.87, + 288.53 + ], + "iref": "#/texts/141", + "name": "text", + "orig-order": 171, + "page": 13, + "span": [ + 0, + 834 + ], + "sref": "#/page-elements/178", + "text-order": 178, + "type": "paragraph" + }, + { + "bbox": [ + 44.74, + 148.51, + 178.23, + 159.9 + ], + "iref": "#/texts/142", "name": "subtitle-level-1", - "orig-order": 2, - "page": 1, + "orig-order": 172, + "page": 13, "span": [ 0, - 17 + 15 ], - "sref": "#/page-elements/6", - "text-order": 6, + "sref": "#/page-elements/179", + "text-order": 179, "type": "subtitle-level-1" }, { "bbox": [ - 160.10069274902344, - 593.7201538085938, - 163.59266662597656, - 605.1080322265625 + 44.79, + 58.08, + 549.52, + 132.55 ], - "iref": "#/texts/3", + "iref": "#/texts/143", "name": "text", - "orig-order": 3, - "page": 1, + "orig-order": 173, + "page": 13, + "span": [ + 0, + 699 + ], + "sref": "#/page-elements/180", + "text-order": 180, + "type": "paragraph" + }, + { + "bbox": [ + 578.37, + 15.45, + 583.48, + 766.71 + ], + "iref": "#/texts/144", + "name": "text", + "orig-order": 180, + "page": 13, + "span": [ + 0, + 320 + ], + "sref": "#/page-elements/181", + "text-order": 181, + "type": "paragraph" + }, + { + "bbox": [ + 46.49, + 751.41, + 70.12, + 758.05 + ], + "iref": "#/texts/145", + "name": "text", + "orig-order": 213, + "page": 14, "span": [ 0, - 1 + 6 ], - "sref": "#/page-elements/7", - "text-order": 7, + "sref": "#/page-elements/182", + "text-order": 182, "type": "paragraph" }, { "bbox": [ - 170.39439392089844, - 593.4388427734375, - 265.1170959472656, - 607.2059326171875 + 510.63, + 751.39, + 551.09, + 759.21 ], - "iref": "#/texts/4", - "name": "subtitle-level-1", - "orig-order": 4, - "page": 1, + "iref": "#/page-headers/17", + "name": "page-header", + "orig-order": 214, + "page": 14, "span": [ 0, 13 ], - "sref": "#/page-elements/8", - "text-order": 8, - "type": "subtitle-level-1" + "sref": "#/page-elements/183", + "text-order": 183, + "type": "page-header" }, { "bbox": [ - 274.5636901855469, - 593.7201538085938, - 278.0556640625, - 605.1080322265625 + 46.39, + 708.07, + 552.19, + 731.09 ], - "iref": "#/texts/5", + "iref": "#/texts/146", "name": "text", - "orig-order": 5, - "page": 1, + "orig-order": 182, + "page": 14, "span": [ 0, - 1 + 119 ], - "sref": "#/page-elements/9", - "text-order": 9, + "sref": "#/page-elements/184", + "text-order": 184, "type": "paragraph" }, { "bbox": [ - 290.0411682128906, - 593.2594604492188, - 387.6253967285156, - 606.9615478515625 + 45.29, + 669.06, + 553.28, + 705.68 ], - "iref": "#/texts/6", + "iref": "#/texts/147", "name": "text", - "orig-order": 6, - "page": 1, + "orig-order": 183, + "page": 14, "span": [ 0, - 14 + 322 ], - "sref": "#/page-elements/10", - "text-order": 10, + "sref": "#/page-elements/185", + "text-order": 185, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 559.602294921875, - 182.68014526367188, - 567.3045654296875 + 44.97, + 643.04, + 553.87, + 666.64 ], - "iref": "#/texts/7", + "iref": "#/texts/148", "name": "text", - "orig-order": 7, - "page": 1, + "orig-order": 184, + "page": 14, "span": [ 0, - 38 + 172 ], - "sref": "#/page-elements/11", - "text-order": 11, + "sref": "#/page-elements/186", + "text-order": 186, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 493.4922180175781, - 164.66183471679688, - 545.3080444335938 + 46.49, + 616.51, + 242.98, + 628.07 ], - "iref": "#/texts/8", + "iref": "#/texts/149", + "name": "subtitle-level-1", + "orig-order": 185, + "page": 14, + "span": [ + 0, + 27 + ], + "sref": "#/page-elements/187", + "text-order": 187, + "type": "subtitle-level-1" + }, + { + "bbox": [ + 46.49, + 603.8, + 209.16, + 615.13 + ], + "iref": "#/texts/150", "name": "text", - "orig-order": 8, - "page": 1, + "orig-order": 186, + "page": 14, "span": [ 0, - 121 + 41 ], - "sref": "#/page-elements/12", - "text-order": 12, + "sref": "#/page-elements/188", + "text-order": 188, "type": "paragraph" }, { "bbox": [ - 209.1903839111328, - 552.2532348632812, - 249.1348114013672, - 561.7433471679688 + 45.65, + 577.84, + 84.4, + 589.02 ], - "iref": "#/texts/9", + "iref": "#/texts/151", "name": "subtitle-level-1", - "orig-order": 9, - "page": 1, + "orig-order": 187, + "page": 14, "span": [ 0, - 8 + 5 ], - "sref": "#/page-elements/13", - "text-order": 13, + "sref": "#/page-elements/189", + "text-order": 189, "type": "subtitle-level-1" }, { "bbox": [ - 208.6128387451172, - 251.58563232421875, - 543.8583984375, - 547.040771484375 + 45.72, + 539.07, + 288.84, + 576.0 ], - "iref": "#/texts/10", + "iref": "#/texts/152", "name": "text", - "orig-order": 10, - "page": 1, + "orig-order": 188, + "page": 14, "span": [ 0, - 1624 + 160 ], - "sref": "#/page-elements/14", - "text-order": 14, + "sref": "#/page-elements/190", + "text-order": 190, "type": "paragraph" }, { "bbox": [ - 209.21104431152344, - 228.2025146484375, - 269.01025390625, - 237.28173828125 + 45.98, + 512.62, + 110.58, + 524.07 ], - "iref": "#/texts/11", + "iref": "#/texts/153", "name": "subtitle-level-1", - "orig-order": 11, - "page": 1, + "orig-order": 189, + "page": 14, "span": [ 0, 8 ], - "sref": "#/page-elements/15", - "text-order": 15, + "sref": "#/page-elements/191", + "text-order": 191, "type": "subtitle-level-1" }, { "bbox": [ - 208.79600524902344, - 214.08453369140625, - 401.0297546386719, - 222.97467041015625 + 46.49, + 498.19, + 411.12, + 507.86 ], - "iref": "#/texts/12", - "name": "text", - "orig-order": 12, - "page": 1, + "iref": "#/texts/154", + "name": "list-item", + "orig-order": 190, + "page": 14, "span": [ 0, - 53 + 99 ], - "sref": "#/page-elements/16", - "text-order": 16, + "sref": "#/page-elements/192", + "text-order": 192, "type": "paragraph" }, { "bbox": [ - 44.27853012084961, - 187.51553344726562, - 189.71961975097656, - 199.65557861328125 + 46.17, + 472.41, + 552.9, + 493.87 ], - "iref": "#/texts/13", - "name": "subtitle-level-1", - "orig-order": 13, - "page": 1, + "iref": "#/texts/155", + "name": "list-item", + "orig-order": 191, + "page": 14, "span": [ 0, - 16 + 285 ], - "sref": "#/page-elements/17", - "text-order": 17, - "type": "subtitle-level-1" + "sref": "#/page-elements/193", + "text-order": 193, + "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 96.98406982421875, - 552.6513061523438, - 172.33074951171875 + 46.39, + 457.72, + 129.31, + 468.09 ], - "iref": "#/texts/14", - "name": "text", - "orig-order": 14, - "page": 1, + "iref": "#/texts/156", + "name": "list-item", + "orig-order": 192, + "page": 14, "span": [ 0, - 639 + 24 ], - "sref": "#/page-elements/18", - "text-order": 18, + "sref": "#/page-elements/194", + "text-order": 194, "type": "paragraph" }, { "bbox": [ - 44.787384033203125, - 52.49696731567383, - 540.7015991210938, - 70.33258056640625 + 45.71, + 443.15, + 242.07, + 453.05 ], - "iref": "#/footnotes/0", - "name": "footnote", - "orig-order": 19, - "page": 1, + "iref": "#/texts/157", + "name": "list-item", + "orig-order": 193, + "page": 14, "span": [ 0, - 201 + 53 ], - "sref": "#/page-elements/19", - "text-order": 19, - "type": "footnote" + "sref": "#/page-elements/195", + "text-order": 195, + "type": "paragraph" }, { "bbox": [ - 44.787384033203125, - 42.44549560546875, - 272.1662902832031, - 50.207763671875 + 46.02, + 417.42, + 554.64, + 438.91 ], - "iref": "#/footnotes/1", - "name": "footnote", - "orig-order": 20, - "page": 1, + "iref": "#/texts/158", + "name": "list-item", + "orig-order": 194, + "page": 14, "span": [ 0, - 75 + 248 ], - "sref": "#/page-elements/20", - "text-order": 20, - "type": "footnote" + "sref": "#/page-elements/196", + "text-order": 196, + "type": "paragraph" }, { "bbox": [ - 44.38350296020508, - 12.301444053649902, - 135.58876037597656, - 30.8690185546875 + 46.49, + 402.9, + 321.26, + 412.64 ], - "iref": "#/page-footers/0", - "name": "page-footer", - "orig-order": 21, - "page": 1, + "iref": "#/texts/159", + "name": "list-item", + "orig-order": 195, + "page": 14, "span": [ 0, - 64 + 70 ], - "sref": "#/page-elements/21", - "text-order": 21, - "type": "page-footer" + "sref": "#/page-elements/197", + "text-order": 197, + "type": "paragraph" }, { "bbox": [ - 400.53094482421875, - 22.279802322387695, - 550.6204223632812, - 29.6954345703125 + 46.0, + 376.94, + 554.38, + 398.06 ], - "iref": "#/page-footers/1", - "name": "page-footer", - "orig-order": 22, - "page": 1, + "iref": "#/texts/160", + "name": "list-item", + "orig-order": 196, + "page": 14, "span": [ 0, - 42 + 211 ], - "sref": "#/page-elements/22", - "text-order": 22, - "type": "page-footer" + "sref": "#/page-elements/198", + "text-order": 198, + "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + 46.06, + 350.92, + 553.26, + 372.03 ], - "iref": "#/texts/15", - "name": "text", - "orig-order": 40, - "page": 2, + "iref": "#/texts/161", + "name": "list-item", + "orig-order": 197, + "page": 14, "span": [ 0, - 5 + 156 ], - "sref": "#/page-elements/23", - "text-order": 23, + "sref": "#/page-elements/199", + "text-order": 199, "type": "paragraph" }, { "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9636840820312, - 758.332763671875 + 45.95, + 335.79, + 129.87, + 346.32 ], - "iref": "#/page-headers/4", - "name": "page-header", - "orig-order": 41, - "page": 2, + "iref": "#/texts/162", + "name": "list-item", + "orig-order": 198, + "page": 14, "span": [ 0, - 13 + 25 ], - "sref": "#/page-elements/24", - "text-order": 24, - "type": "page-header" + "sref": "#/page-elements/200", + "text-order": 200, + "type": "paragraph" }, { "bbox": [ - 45.97464370727539, - 604.0350952148438, - 554.3433227539062, - 732.5863037109375 + 45.83, + 321.95, + 234.11, + 331.86 ], - "iref": "#/texts/16", - "name": "text", - "orig-order": 23, - "page": 2, + "iref": "#/texts/163", + "name": "list-item", + "orig-order": 199, + "page": 14, "span": [ 0, - 1082 + 54 ], - "sref": "#/page-elements/25", - "text-order": 25, + "sref": "#/page-elements/201", + "text-order": 201, "type": "paragraph" }, { "bbox": [ - 46.485626220703125, - 513.0453491210938, - 553.2366943359375, - 601.0419921875 + 46.48, + 307.19, + 269.67, + 316.97 ], - "iref": "#/texts/17", - "name": "text", - "orig-order": 24, - "page": 2, + "iref": "#/texts/164", + "name": "list-item", + "orig-order": 200, + "page": 14, "span": [ 0, - 836 + 61 ], - "sref": "#/page-elements/26", - "text-order": 26, + "sref": "#/page-elements/202", + "text-order": 202, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 500.0622253417969, - 340.59906005859375, - 509.4723205566406 + 46.02, + 292.92, + 301.01, + 302.85 ], - "iref": "#/texts/18", - "name": "text", - "orig-order": 25, - "page": 2, + "iref": "#/texts/165", + "name": "list-item", + "orig-order": 201, + "page": 14, "span": [ 0, - 69 + 75 ], - "sref": "#/page-elements/27", - "text-order": 27, + "sref": "#/page-elements/203", + "text-order": 203, "type": "paragraph" }, { "bbox": [ - 57.86075973510742, - 487.0791015625, - 492.157958984375, - 496.63543701171875 + 46.44, + 278.17, + 187.93, + 288.11 ], - "iref": "#/texts/19", - "name": "text", - "orig-order": 26, - "page": 2, + "iref": "#/texts/166", + "name": "list-item", + "orig-order": 202, + "page": 14, "span": [ 0, - 101 + 43 ], - "sref": "#/page-elements/28", - "text-order": 28, + "sref": "#/page-elements/204", + "text-order": 204, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 461.0568542480469, - 262.5708312988281, - 470.5727233886719 + 46.01, + 263.8, + 169.37, + 274.13 ], - "iref": "#/texts/20", + "iref": "#/texts/167", "name": "list-item", - "orig-order": 27, - "page": 2, + "orig-order": 203, + "page": 14, "span": [ 0, - 49 + 36 ], - "sref": "#/page-elements/29", - "text-order": 29, + "sref": "#/page-elements/205", + "text-order": 205, "type": "paragraph" }, { "bbox": [ - 45.779930114746094, - 448.07373046875, - 241.75213623046875, - 457.51177978515625 + 46.05, + 231.93, + 123.27, + 244.55 ], - "iref": "#/texts/21", - "name": "list-item", - "orig-order": 28, - "page": 2, + "iref": "#/texts/168", + "name": "subtitle-level-1", + "orig-order": 204, + "page": 14, "span": [ 0, - 45 + 10 ], - "sref": "#/page-elements/30", - "text-order": 30, - "type": "paragraph" + "sref": "#/page-elements/206", + "text-order": 206, + "type": "subtitle-level-1" }, { "bbox": [ - 46.48820114135742, - 435.03460693359375, - 174.95623779296875, - 444.5535583496094 + 50.67, + 207.43, + 552.38, + 228.92 ], - "iref": "#/texts/22", + "iref": "#/texts/169", "name": "list-item", - "orig-order": 29, - "page": 2, + "orig-order": 205, + "page": 14, "span": [ 0, - 29 + 179 ], - "sref": "#/page-elements/31", - "text-order": 31, + "sref": "#/page-elements/207", + "text-order": 207, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 422.0514831542969, - 528.8121948242188, - 431.5508728027344 + 50.74, + 184.41, + 552.62, + 205.77 ], - "iref": "#/texts/23", + "iref": "#/texts/170", "name": "list-item", - "orig-order": 30, - "page": 2, + "orig-order": 206, + "page": 14, "span": [ 0, - 112 + 163 ], - "sref": "#/page-elements/32", - "text-order": 32, + "sref": "#/page-elements/208", + "text-order": 208, "type": "paragraph" }, { "bbox": [ - 45.387489318847656, - 409.068359375, - 446.47918701171875, - 418.8954772949219 + 50.74, + 161.39, + 552.68, + 182.65 ], - "iref": "#/texts/24", + "iref": "#/texts/171", "name": "list-item", - "orig-order": 31, - "page": 2, + "orig-order": 207, + "page": 14, "span": [ 0, - 94 + 168 ], - "sref": "#/page-elements/33", - "text-order": 33, + "sref": "#/page-elements/209", + "text-order": 209, "type": "paragraph" }, { "bbox": [ - 45.996150970458984, - 292.05224609375, - 553.0557861328125, - 392.69879150390625 + 50.17, + 126.92, + 552.57, + 159.62 ], - "iref": "#/texts/25", - "name": "text", - "orig-order": 32, - "page": 2, + "iref": "#/texts/172", + "name": "list-item", + "orig-order": 208, + "page": 14, "span": [ 0, - 869 + 292 ], - "sref": "#/page-elements/34", - "text-order": 34, + "sref": "#/page-elements/210", + "text-order": 210, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 265.89093017578125, - 551.4827270507812, - 288.8219299316406 + 50.49, + 103.9, + 553.58, + 124.9 ], - "iref": "#/texts/26", - "name": "text", - "orig-order": 33, - "page": 2, + "iref": "#/texts/173", + "name": "list-item", + "orig-order": 209, + "page": 14, "span": [ 0, - 140 + 171 ], - "sref": "#/page-elements/35", - "text-order": 35, + "sref": "#/page-elements/211", + "text-order": 211, "type": "paragraph" }, { "bbox": [ - 46.371070861816406, - 240.06375122070312, - 515.491943359375, - 249.5263671875 + 50.74, + 92.39, + 436.99, + 101.69 ], - "iref": "#/texts/27", + "iref": "#/texts/174", "name": "list-item", - "orig-order": 34, - "page": 2, + "orig-order": 210, + "page": 14, "span": [ 0, - 111 + 102 ], - "sref": "#/page-elements/36", - "text-order": 36, + "sref": "#/page-elements/212", + "text-order": 212, "type": "paragraph" }, { "bbox": [ - 46.48820114135742, - 214.04150390625, - 551.0504760742188, - 236.58538818359375 + 50.74, + 69.43, + 552.49, + 90.58 ], - "iref": "#/texts/28", + "iref": "#/texts/175", "name": "list-item", - "orig-order": 35, - "page": 2, + "orig-order": 211, + "page": 14, "span": [ 0, - 180 + 156 ], - "sref": "#/page-elements/37", - "text-order": 37, + "sref": "#/page-elements/213", + "text-order": 213, "type": "paragraph" }, { "bbox": [ - 45.20487594604492, - 201.05838012695312, - 376.7724914550781, - 210.76416015625 + 50.38, + 46.41, + 553.17, + 67.6 ], - "iref": "#/texts/29", + "iref": "#/texts/176", "name": "list-item", - "orig-order": 36, - "page": 2, + "orig-order": 212, + "page": 14, "span": [ 0, - 82 + 184 ], - "sref": "#/page-elements/38", - "text-order": 38, + "sref": "#/page-elements/214", + "text-order": 214, "type": "paragraph" }, { "bbox": [ - 46.2375373840332, - 110.07154846191406, - 553.1372680664062, - 184.7841796875 + 578.37, + 15.45, + 583.48, + 766.71 ], - "iref": "#/texts/30", + "iref": "#/texts/177", "name": "text", - "orig-order": 37, - "page": 2, + "orig-order": 215, + "page": 14, "span": [ 0, - 647 + 320 ], - "sref": "#/page-elements/39", - "text-order": 39, + "sref": "#/page-elements/215", + "text-order": 215, "type": "paragraph" }, { "bbox": [ - 46.487701416015625, - 84.04928588867188, - 550.5083618164062, - 107.71282958984375 + 44.47, + 751.46, + 84.89, + 758.81 ], - "iref": "#/texts/31", - "name": "text", - "orig-order": 38, - "page": 2, + "iref": "#/page-headers/18", + "name": "page-header", + "orig-order": 228, + "page": 15, "span": [ 0, - 202 + 13 ], - "sref": "#/page-elements/40", - "text-order": 40, - "type": "paragraph" + "sref": "#/page-elements/216", + "text-order": 216, + "type": "page-header" }, { "bbox": [ - 45.976261138916016, - 45.04500961303711, - 551.8382568359375, - 81.24627685546875 + 454.56, + 745.46, + 549.1, + 761.86 ], - "iref": "#/texts/32", - "name": "text", - "orig-order": 39, - "page": 2, + "iref": "#/figures/8", + "name": "picture", + "orig-order": 229, + "page": 15, "span": [ 0, - 346 + 0 ], - "sref": "#/page-elements/41", - "text-order": 41, - "type": "paragraph" + "sref": "#/page-elements/217", + "text-order": 217, + "type": "figure" }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 46.63, + 722.43, + 362.75, + 731.72 ], - "iref": "#/texts/33", - "name": "text", - "orig-order": 42, - "page": 2, + "iref": "#/texts/178", + "name": "list-item", + "orig-order": 216, + "page": 15, "span": [ 0, - 320 + 85 ], - "sref": "#/page-elements/42", - "text-order": 42, + "sref": "#/page-elements/218", + "text-order": 218, + "type": "paragraph" + }, + { + "bbox": [ + 44.79, + 699.52, + 549.75, + 720.41 + ], + "iref": "#/texts/179", + "name": "list-item", + "orig-order": 217, + "page": 15, + "span": [ + 0, + 168 + ], + "sref": "#/page-elements/219", + "text-order": 219, "type": "paragraph" }, { "bbox": [ - 44.50688552856445, - 751.4635620117188, - 85.01602935791016, - 758.0504760742188 + 44.79, + 688.01, + 238.67, + 697.14 ], - "iref": "#/page-headers/5", - "name": "page-header", - "orig-order": 50, - "page": 3, + "iref": "#/texts/180", + "name": "list-item", + "orig-order": 218, + "page": 15, "span": [ 0, - 13 + 50 ], - "sref": "#/page-elements/43", - "text-order": 43, - "type": "page-header" + "sref": "#/page-elements/220", + "text-order": 220, + "type": "paragraph" }, { "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + 44.55, + 676.5, + 243.04, + 685.7 ], - "iref": "#/texts/34", - "name": "text", - "orig-order": 51, - "page": 3, + "iref": "#/texts/181", + "name": "list-item", + "orig-order": 219, + "page": 15, "span": [ 0, - 5 + 52 ], - "sref": "#/page-elements/44", - "text-order": 44, + "sref": "#/page-elements/221", + "text-order": 221, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 695.0468139648438, - 549.4096069335938, - 730.4614868164062 + 44.79, + 653.54, + 548.76, + 674.38 ], - "iref": "#/texts/35", - "name": "text", - "orig-order": 43, - "page": 3, + "iref": "#/texts/182", + "name": "list-item", + "orig-order": 220, + "page": 15, "span": [ 0, - 262 + 145 ], - "sref": "#/page-elements/45", - "text-order": 45, + "sref": "#/page-elements/222", + "text-order": 222, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 655.5153198242188, - 378.15191650390625, - 666.9031982421875 + 44.79, + 630.52, + 548.83, + 651.58 ], - "iref": "#/texts/36", - "name": "subtitle-level-1", - "orig-order": 44, - "page": 3, + "iref": "#/texts/183", + "name": "list-item", + "orig-order": 221, + "page": 15, "span": [ 0, - 37 + 252 ], - "sref": "#/page-elements/46", - "text-order": 46, - "type": "subtitle-level-1" + "sref": "#/page-elements/223", + "text-order": 223, + "type": "paragraph" }, { "bbox": [ - 44.785400390625, - 552.0484008789062, - 549.7849731445312, - 639.5802001953125 + 44.79, + 607.51, + 550.84, + 628.08 ], - "iref": "#/texts/37", - "name": "text", - "orig-order": 45, - "page": 3, + "iref": "#/texts/184", + "name": "list-item", + "orig-order": 222, + "page": 15, "span": [ 0, - 796 + 147 ], - "sref": "#/page-elements/47", - "text-order": 47, + "sref": "#/page-elements/224", + "text-order": 224, "type": "paragraph" }, { "bbox": [ - 44.785430908203125, - 409.068603515625, - 554.4052124023438, - 548.475341796875 + 44.79, + 596.0, + 474.98, + 604.66 ], - "iref": "#/texts/38", - "name": "text", - "orig-order": 46, - "page": 3, + "iref": "#/texts/185", + "name": "list-item", + "orig-order": 223, + "page": 15, "span": [ 0, - 1141 + 114 ], - "sref": "#/page-elements/48", - "text-order": 48, + "sref": "#/page-elements/225", + "text-order": 225, "type": "paragraph" }, { "bbox": [ - 44.78739929199219, - 369.4996032714844, - 134.88641357421875, - 380.88751220703125 + 44.79, + 573.04, + 548.8, + 592.54 ], - "iref": "#/texts/39", - "name": "subtitle-level-1", - "orig-order": 47, - "page": 3, + "iref": "#/texts/186", + "name": "list-item", + "orig-order": 224, + "page": 15, "span": [ 0, - 14 + 197 ], - "sref": "#/page-elements/49", - "text-order": 49, - "type": "subtitle-level-1" + "sref": "#/page-elements/226", + "text-order": 226, + "type": "paragraph" }, { "bbox": [ - 44.524391174316406, - 317.6519470214844, - 552.3914184570312, - 353.5248107910156 + 44.79, + 550.02, + 548.72, + 569.83 ], - "iref": "#/texts/40", - "name": "text", - "orig-order": 48, - "page": 3, + "iref": "#/texts/187", + "name": "list-item", + "orig-order": 225, + "page": 15, "span": [ 0, - 232 + 142 ], - "sref": "#/page-elements/50", - "text-order": 50, + "sref": "#/page-elements/227", + "text-order": 227, "type": "paragraph" }, { "bbox": [ - 78.5494384765625, - 102.71893310546875, - 512.3916625976562, - 284.9899597167969 + 44.79, + 527.0, + 550.57, + 546.75 ], - "iref": "#/figures/0", - "name": "picture", - "orig-order": 53, - "page": 3, + "iref": "#/texts/188", + "name": "list-item", + "orig-order": 226, + "page": 15, "span": [ 0, - 0 + 176 ], - "sref": "#/page-elements/51", - "text-order": 51, - "type": "figure" + "sref": "#/page-elements/228", + "text-order": 228, + "type": "paragraph" }, { "bbox": [ - 44.78328323364258, - 45.39774703979492, - 545.7940673828125, - 89.4708251953125 + 57.16, + 468.54, + 529.74, + 491.14 ], - "iref": "#/figures/0/captions/0", - "name": "caption", - "orig-order": 49, - "page": 3, + "iref": "#/texts/189", + "name": "text", + "orig-order": 227, + "page": 15, "span": [ 0, - 498 + 216 ], - "sref": "#/page-elements/52", - "text-order": 52, - "type": "caption" + "sref": "#/page-elements/229", + "text-order": 229, + "type": "paragraph" }, { "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + 578.37, + 15.45, + 583.48, + 766.71 ], - "iref": "#/texts/41", + "iref": "#/texts/190", "name": "text", - "orig-order": 52, - "page": 3, + "orig-order": 230, + "page": 15, "span": [ 0, 320 ], - "sref": "#/page-elements/53", - "text-order": 53, + "sref": "#/page-elements/230", + "text-order": 230, "type": "paragraph" + } + ], + "page-footers": [ + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/0", + "orig": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", + "prov": [ + { + "$ref": "#/page-elements/21" + } + ], + "sref": "#/page-footers/0", + "subj_hash": 12400883656433726216, + "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", + "text_hash": 8372141692634509619, + "type": "page-footer" }, { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/1", + "orig": "wileyonlinelibrary.com/journal/ail2 1of15", + "prov": [ + { + "$ref": "#/page-elements/22" + } ], - "iref": "#/texts/42", - "name": "text", - "orig-order": 63, - "page": 4, - "span": [ - 0, - 5 + "sref": "#/page-footers/1", + "subj_hash": 10244115652970867690, + "text": "wileyonlinelibrary.com/journal/ail2 1of15", + "text_hash": 6196517219334265105, + "type": "page-footer" + } + ], + "page-headers": [ + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/0", + "orig": "Received: 15 September 2020", + "prov": [ + { + "$ref": "#/page-elements/0" + } ], - "sref": "#/page-elements/54", - "text-order": 54, - "type": "paragraph" + "sref": "#/page-headers/0", + "subj_hash": 1841431076736563689, + "text": "Received: 15 September 2020", + "text_hash": 16688788223092401940, + "type": "page-header" }, { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9420166015625, - 758.4869384765625 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/1", + "orig": "Revised: 23 November 2020", + "prov": [ + { + "$ref": "#/page-elements/1" + } ], - "iref": "#/page-headers/6", - "name": "page-header", - "orig-order": 64, - "page": 4, - "span": [ - 0, - 13 + "sref": "#/page-headers/1", + "subj_hash": 3915126318503464014, + "text": "Revised: 23 November 2020", + "text_hash": 1000711515083668085, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/2", + "orig": "Accepted: 25 November 2020", + "prov": [ + { + "$ref": "#/page-elements/2" + } ], - "sref": "#/page-elements/55", - "text-order": 55, + "sref": "#/page-headers/2", + "subj_hash": 1727876228376027809, + "text": "Accepted: 25 November 2020", + "text_hash": 17099649843681009628, "type": "page-header" }, { - "bbox": [ - 45.14111328125, - 720.4854736328125, - 157.7607421875, - 732.3443603515625 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/3", + "orig": "DOI: 10.1002/ail2.20", + "prov": [ + { + "$ref": "#/page-elements/3" + } ], - "iref": "#/texts/43", - "name": "subtitle-level-1", - "orig-order": 54, - "page": 4, - "span": [ - 0, - 18 + "sref": "#/page-headers/3", + "subj_hash": 4558221577189246496, + "text": "DOI: 10.1002/ail2.20", + "text_hash": 348625343742526555, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/4", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/24" + } ], - "sref": "#/page-elements/56", - "text-order": 56, - "type": "subtitle-level-1" + "sref": "#/page-headers/4", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" }, { - "bbox": [ - 46.48820114135742, - 656.0805053710938, - 553.5469360351562, - 704.7728881835938 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/5", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/43" + } ], - "iref": "#/texts/44", - "name": "text", - "orig-order": 55, - "page": 4, - "span": [ - 0, - 403 + "sref": "#/page-headers/5", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/6", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/55" + } ], - "sref": "#/page-elements/57", - "text-order": 57, - "type": "paragraph" + "sref": "#/page-headers/6", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" }, { - "bbox": [ - 45.56229019165039, - 604.0359497070312, - 553.0910034179688, - 652.8948974609375 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/7", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/66" + } ], - "iref": "#/texts/45", - "name": "text", - "orig-order": 56, - "page": 4, - "span": [ - 0, - 417 + "sref": "#/page-headers/7", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/8", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/76" + } ], - "sref": "#/page-elements/58", - "text-order": 58, - "type": "paragraph" + "sref": "#/page-headers/8", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" }, { - "bbox": [ - 45.6591796875, - 565.0864868164062, - 552.8568115234375, - 600.9397583007812 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/9", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/92" + } ], - "iref": "#/texts/46", - "name": "text", - "orig-order": 57, - "page": 4, - "span": [ - 0, - 282 + "sref": "#/page-headers/9", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/10", + "orig": "8of15", + "prov": [ + { + "$ref": "#/page-elements/106" + } ], - "sref": "#/page-elements/59", - "text-order": 59, - "type": "paragraph" + "sref": "#/page-headers/10", + "subj_hash": 4361549266732238272, + "text": "8of15", + "text_hash": 329104147727696635, + "type": "page-header" }, { - "bbox": [ - 45.497798919677734, - 525.5185546875, - 161.91403198242188, - 536.9064331054688 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/11", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/107" + } ], - "iref": "#/texts/47", - "name": "subtitle-level-1", - "orig-order": 58, - "page": 4, - "span": [ - 0, - 18 + "sref": "#/page-headers/11", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/12", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/117" + } ], - "sref": "#/page-elements/60", - "text-order": 60, - "type": "subtitle-level-1" + "sref": "#/page-headers/12", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" }, { - "bbox": [ - 46.28074645996094, - 435.03485107421875, - 552.7772827148438, - 509.80706787109375 + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/13", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/131" + } ], - "iref": "#/texts/48", - "name": "text", - "orig-order": 59, - "page": 4, - "span": [ - 0, - 647 + "sref": "#/page-headers/13", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/14", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/145" + } + ], + "sref": "#/page-headers/14", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/15", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/156" + } + ], + "sref": "#/page-headers/15", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/16", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/170" + } + ], + "sref": "#/page-headers/16", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/17", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/183" + } + ], + "sref": "#/page-headers/17", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + }, + { + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/18", + "orig": "STAAR ET AL.", + "prov": [ + { + "$ref": "#/page-elements/216" + } + ], + "sref": "#/page-headers/18", + "subj_hash": 8492015887072434396, + "text": "STAAR ET AL.", + "text_hash": 14658966106383255015, + "type": "page-header" + } + ], + "properties": { + "data": [ + [ + "language", + 13357303559203493643, + "DOCUMENT", + "#", + "en", + 0.99 + ], + [ + "language", + 2144509362215609527, + "TEXT", + "#/texts/0", + "en", + 0.4 + ], + [ + "semantic", + 2144509362215609527, + "TEXT", + "#/texts/0", + "meta-data", + 0.69 + ], + [ + "language", + 16672720454366774824, + "TEXT", + "#/texts/1", + "en", + 0.75 + ], + [ + "semantic", + 16672720454366774824, + "TEXT", + "#/texts/1", + "text", + 0.52 + ], + [ + "language", + 16781763356419781679, + "TEXT", + "#/texts/2", + "nl", + 0.45 + ], + [ + "semantic", + 16781763356419781679, + "TEXT", + "#/texts/2", + "meta-data", + 1.0 + ], + [ + "language", + 3352447812305581329, + "TEXT", + "#/texts/3", + "ceb", + 0.48 + ], + [ + "semantic", + 3352447812305581329, + "TEXT", + "#/texts/3", + "text", + 0.95 + ], + [ + "language", + 14877831450145300436, + "TEXT", + "#/texts/4", + "it", + 0.35 + ], + [ + "semantic", + 14877831450145300436, + "TEXT", + "#/texts/4", + "meta-data", + 1.0 + ], + [ + "language", + 3352447812305581329, + "TEXT", + "#/texts/5", + "ceb", + 0.48 + ], + [ + "semantic", + 3352447812305581329, + "TEXT", + "#/texts/5", + "text", + 0.95 + ], + [ + "language", + 13336841394978214677, + "TEXT", + "#/texts/6", + "de", + 0.56 + ], + [ + "semantic", + 13336841394978214677, + "TEXT", + "#/texts/6", + "meta-data", + 0.97 + ], + [ + "language", + 15325526562897377208, + "TEXT", + "#/texts/7", + "en", + 0.8 + ], + [ + "semantic", + 15325526562897377208, + "TEXT", + "#/texts/7", + "meta-data", + 0.71 ], - "sref": "#/page-elements/61", - "text-order": 61, - "type": "paragraph" - }, - { - "bbox": [ - 45.999271392822266, - 370.0654296875, - 551.750244140625, - 431.6009521484375 + [ + "language", + 4017434568255781081, + "TEXT", + "#/texts/8", + "en", + 0.33 ], - "iref": "#/texts/49", - "name": "text", - "orig-order": 60, - "page": 4, - "span": [ - 0, - 542 + [ + "semantic", + 4017434568255781081, + "TEXT", + "#/texts/8", + "meta-data", + 0.66 ], - "sref": "#/page-elements/62", - "text-order": 62, - "type": "paragraph" - }, - { - "bbox": [ - 46.37678527832031, - 304.9195251464844, - 551.427001953125, - 366.6332092285156 + [ + "language", + 8487024695951375934, + "TEXT", + "#/texts/9", + "en", + 0.32 ], - "iref": "#/texts/50", - "name": "text", - "orig-order": 61, - "page": 4, - "span": [ - 0, - 580 + [ + "semantic", + 8487024695951375934, + "TEXT", + "#/texts/9", + "header", + 0.93 ], - "sref": "#/page-elements/63", - "text-order": 63, - "type": "paragraph" - }, - { - "bbox": [ - 46.48663330078125, - 45.39759826660156, - 540.3204956054688, - 67.21272277832031 + [ + "language", + 11695737263227886476, + "TEXT", + "#/texts/10", + "en", + 0.93 ], - "iref": "#/texts/51", - "name": "text", - "orig-order": 62, - "page": 4, - "span": [ - 0, - 220 + [ + "semantic", + 11695737263227886476, + "TEXT", + "#/texts/10", + "text", + 0.97 ], - "sref": "#/page-elements/64", - "text-order": 64, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 8500733160758672230, + "TEXT", + "#/texts/11", + "es", + 0.36 ], - "iref": "#/texts/52", - "name": "text", - "orig-order": 65, - "page": 4, - "span": [ - 0, - 320 + [ + "semantic", + 8500733160758672230, + "TEXT", + "#/texts/11", + "header", + 0.93 ], - "sref": "#/page-elements/65", - "text-order": 65, - "type": "paragraph" - }, - { - "bbox": [ - 44.041500091552734, - 751.3096313476562, - 85.72028350830078, - 759.7291870117188 + [ + "language", + 4452030907228745864, + "TEXT", + "#/texts/12", + "en", + 0.62 ], - "iref": "#/page-headers/7", - "name": "page-header", - "orig-order": 72, - "page": 5, - "span": [ - 0, - 13 + [ + "semantic", + 4452030907228745864, + "TEXT", + "#/texts/12", + "reference", + 0.54 ], - "sref": "#/page-elements/66", - "text-order": 66, - "type": "page-header" - }, - { - "bbox": [ - 454.1357421875, - 745.7154541015625, - 550.62109375, - 761.0070190429688 + [ + "language", + 11913688961435238004, + "TEXT", + "#/texts/13", + "en", + 0.64 ], - "iref": "#/figures/1", - "name": "picture", - "orig-order": 73, - "page": 5, - "span": [ - 0, - 0 + [ + "semantic", + 11913688961435238004, + "TEXT", + "#/texts/13", + "header", + 0.99 ], - "sref": "#/page-elements/67", - "text-order": 67, - "type": "figure" - }, - { - "bbox": [ - 44.78594970703125, - 483.39947509765625, - 548.2582397460938, - 529.3165283203125 + [ + "language", + 9977041563469582014, + "TEXT", + "#/texts/14", + "en", + 0.96 ], - "iref": "#/texts/53", - "name": "text", - "orig-order": 71, - "page": 5, - "span": [ - 0, - 421 + [ + "semantic", + 9977041563469582014, + "TEXT", + "#/texts/14", + "text", + 0.99 ], - "sref": "#/page-elements/68", - "text-order": 68, - "type": "paragraph" - }, - { - "bbox": [ - 44.78684997558594, - 370.0640563964844, - 549.865478515625, - 444.5719299316406 + [ + "language", + 4361549266817300114, + "TEXT", + "#/texts/15", + "en", + 0.18 ], - "iref": "#/texts/54", - "name": "text", - "orig-order": 66, - "page": 5, - "span": [ - 0, - 687 + [ + "semantic", + 4361549266817300114, + "TEXT", + "#/texts/15", + "meta-data", + 0.75 ], - "sref": "#/page-elements/69", - "text-order": 69, - "type": "paragraph" - }, - { - "bbox": [ - 44.206939697265625, - 330.4949035644531, - 223.93128967285156, - 341.8828125 + [ + "language", + 8425126282903547933, + "TEXT", + "#/texts/16", + "en", + 0.93 ], - "iref": "#/texts/55", - "name": "subtitle-level-1", - "orig-order": 67, - "page": 5, - "span": [ - 0, - 31 + [ + "semantic", + 8425126282903547933, + "TEXT", + "#/texts/16", + "text", + 1.0 ], - "sref": "#/page-elements/70", - "text-order": 70, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78684616088867, - 149.07435607910156, - 549.819091796875, - 314.53570556640625 + [ + "language", + 16507313240019459642, + "TEXT", + "#/texts/17", + "en", + 0.9 ], - "iref": "#/texts/56", - "name": "text", - "orig-order": 68, - "page": 5, - "span": [ - 0, - 1517 + [ + "semantic", + 16507313240019459642, + "TEXT", + "#/texts/17", + "text", + 1.0 ], - "sref": "#/page-elements/71", - "text-order": 71, - "type": "paragraph" - }, - { - "bbox": [ - 43.94790267944336, - 109.50601959228516, - 254.47779846191406, - 120.89392852783203 + [ + "language", + 7900229969942228522, + "TEXT", + "#/texts/18", + "en", + 0.98 ], - "iref": "#/texts/57", - "name": "subtitle-level-1", - "orig-order": 69, - "page": 5, - "span": [ - 0, - 36 + [ + "semantic", + 7900229969942228522, + "TEXT", + "#/texts/18", + "text", + 1.0 ], - "sref": "#/page-elements/72", - "text-order": 72, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 45.00958251953125, - 549.1444091796875, - 93.61456298828125 + [ + "language", + 10081303962589804251, + "TEXT", + "#/texts/19", + "en", + 0.92 ], - "iref": "#/texts/58", - "name": "text", - "orig-order": 70, - "page": 5, - "span": [ - 0, - 384 + [ + "semantic", + 10081303962589804251, + "TEXT", + "#/texts/19", + "text", + 1.0 ], - "sref": "#/page-elements/73", - "text-order": 73, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 12186698460099365002, + "TEXT", + "#/texts/20", + "en", + 0.51 ], - "iref": "#/texts/59", - "name": "text", - "orig-order": 74, - "page": 5, - "span": [ - 0, - 320 + [ + "semantic", + 12186698460099365002, + "TEXT", + "#/texts/20", + "text", + 0.62 ], - "sref": "#/page-elements/74", - "text-order": 74, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 68.55958557128906, - 758.0504760742188 + [ + "language", + 14190244699299580163, + "TEXT", + "#/texts/21", + "en", + 0.63 ], - "iref": "#/texts/60", - "name": "text", - "orig-order": 89, - "page": 6, - "span": [ - 0, - 5 + [ + "semantic", + 14190244699299580163, + "TEXT", + "#/texts/21", + "reference", + 0.96 ], - "sref": "#/page-elements/75", - "text-order": 75, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.9879150390625, - 758.9756469726562 + [ + "language", + 1376279050886549305, + "TEXT", + "#/texts/22", + "en", + 0.46 ], - "iref": "#/page-headers/8", - "name": "page-header", - "orig-order": 90, - "page": 6, - "span": [ - 0, - 13 + [ + "semantic", + 1376279050886549305, + "TEXT", + "#/texts/22", + "reference", + 0.5 ], - "sref": "#/page-elements/76", - "text-order": 76, - "type": "page-header" - }, - { - "bbox": [ - 45.78483581542969, - 669.0628051757812, - 554.4027709960938, - 730.823486328125 + [ + "language", + 10155628801693924200, + "TEXT", + "#/texts/23", + "en", + 0.93 ], - "iref": "#/texts/61", - "name": "text", - "orig-order": 75, - "page": 6, - "span": [ - 0, - 564 + [ + "semantic", + 10155628801693924200, + "TEXT", + "#/texts/23", + "text", + 0.78 ], - "sref": "#/page-elements/77", - "text-order": 77, - "type": "paragraph" - }, - { - "bbox": [ - 45.753639221191406, - 629.4933471679688, - 148.00445556640625, - 641.5734252929688 + [ + "language", + 9107499507097280105, + "TEXT", + "#/texts/24", + "en", + 0.93 ], - "iref": "#/texts/62", - "name": "subtitle-level-1", - "orig-order": 76, - "page": 6, - "span": [ - 0, - 16 + [ + "semantic", + 9107499507097280105, + "TEXT", + "#/texts/24", + "header", + 0.43 ], - "sref": "#/page-elements/78", - "text-order": 78, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 591.0541381835938, - 552.9049682617188, - 613.8143310546875 + [ + "language", + 7248467870339433322, + "TEXT", + "#/texts/25", + "en", + 0.93 ], - "iref": "#/texts/63", - "name": "text", - "orig-order": 77, - "page": 6, - "span": [ - 0, - 225 + [ + "semantic", + 7248467870339433322, + "TEXT", + "#/texts/25", + "text", + 0.99 ], - "sref": "#/page-elements/79", - "text-order": 79, - "type": "paragraph" - }, - { - "bbox": [ - 46.445133209228516, - 552.0497436523438, - 553.362548828125, - 575.2869873046875 + [ + "language", + 13346892078888080449, + "TEXT", + "#/texts/26", + "en", + 0.89 ], - "iref": "#/texts/64", - "name": "list-item", - "orig-order": 78, - "page": 6, - "span": [ - 0, - 179 + [ + "semantic", + 13346892078888080449, + "TEXT", + "#/texts/26", + "text", + 0.99 ], - "sref": "#/page-elements/80", - "text-order": 80, - "type": "paragraph" - }, - { - "bbox": [ - 45.744380950927734, - 526.0834350585938, - 553.5414428710938, - 548.8994140625 + [ + "language", + 1118972765223422660, + "TEXT", + "#/texts/27", + "en", + 0.91 ], - "iref": "#/texts/65", - "name": "list-item", - "orig-order": 79, - "page": 6, - "span": [ - 0, - 133 + [ + "semantic", + 1118972765223422660, + "TEXT", + "#/texts/27", + "text", + 1.0 ], - "sref": "#/page-elements/81", - "text-order": 81, - "type": "paragraph" - }, - { - "bbox": [ - 44.8809700012207, - 513.0443115234375, - 481.36083984375, - 523.5081787109375 + [ + "language", + 324023167304456371, + "TEXT", + "#/texts/28", + "en", + 0.92 ], - "iref": "#/texts/66", - "name": "list-item", - "orig-order": 80, - "page": 6, - "span": [ - 0, - 101 + [ + "semantic", + 324023167304456371, + "TEXT", + "#/texts/28", + "text", + 0.99 ], - "sref": "#/page-elements/82", - "text-order": 82, - "type": "paragraph" - }, - { - "bbox": [ - 46.38796615600586, - 435.0345458984375, - 553.393310546875, - 497.0226135253906 + [ + "language", + 4651508276868765576, + "TEXT", + "#/texts/29", + "en", + 0.73 ], - "iref": "#/texts/67", - "name": "text", - "orig-order": 81, - "page": 6, - "span": [ - 0, - 525 + [ + "semantic", + 4651508276868765576, + "TEXT", + "#/texts/29", + "text", + 1.0 ], - "sref": "#/page-elements/83", - "text-order": 83, - "type": "paragraph" - }, - { - "bbox": [ - 45.54835891723633, - 344.0406799316406, - 555.0050048828125, - 432.1236877441406 + [ + "language", + 3052020526349962744, + "TEXT", + "#/texts/30", + "en", + 0.93 ], - "iref": "#/texts/68", - "name": "text", - "orig-order": 82, - "page": 6, - "span": [ - 0, - 693 + [ + "semantic", + 3052020526349962744, + "TEXT", + "#/texts/30", + "text", + 0.99 ], - "sref": "#/page-elements/84", - "text-order": 84, - "type": "paragraph" - }, - { - "bbox": [ - 46.25617980957031, - 304.472900390625, - 469.55108642578125, - 315.8608093261719 + [ + "language", + 6725501529910185390, + "TEXT", + "#/texts/31", + "en", + 0.97 ], - "iref": "#/texts/69", - "name": "subtitle-level-1", - "orig-order": 83, - "page": 6, - "span": [ - 0, - 48 + [ + "semantic", + 6725501529910185390, + "TEXT", + "#/texts/31", + "text", + 0.99 ], - "sref": "#/page-elements/85", - "text-order": 85, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 265.92974853515625, - 552.6448364257812, - 288.6134338378906 + [ + "language", + 14814111183601762276, + "TEXT", + "#/texts/32", + "en", + 0.9 + ], + [ + "semantic", + 14814111183601762276, + "TEXT", + "#/texts/32", + "text", + 1.0 ], - "iref": "#/texts/70", - "name": "text", - "orig-order": 84, - "page": 6, - "span": [ - 0, - 166 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/33", + "en", + 0.78 ], - "sref": "#/page-elements/86", - "text-order": 86, - "type": "paragraph" - }, - { - "bbox": [ - 46.377140045166016, - 240.049560546875, - 429.5157165527344, - 249.76214599609375 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/33", + "text", + 0.52 ], - "iref": "#/texts/71", - "name": "list-item", - "orig-order": 85, - "page": 6, - "span": [ - 0, - 92 + [ + "language", + 4361549266681704196, + "TEXT", + "#/texts/34", + "en", + 0.39 ], - "sref": "#/page-elements/87", - "text-order": 87, - "type": "paragraph" - }, - { - "bbox": [ - 45.62164306640625, - 227.0850830078125, - 346.3638916015625, - 237.665283203125 + [ + "semantic", + 4361549266681704196, + "TEXT", + "#/texts/34", + "meta-data", + 0.86 ], - "iref": "#/texts/72", - "name": "list-item", - "orig-order": 86, - "page": 6, - "span": [ - 0, - 73 + [ + "language", + 8043608144162608258, + "TEXT", + "#/texts/35", + "en", + 0.94 ], - "sref": "#/page-elements/88", - "text-order": 88, - "type": "paragraph" - }, - { - "bbox": [ - 45.322208404541016, - 162.0574493408203, - 553.8873901367188, - 210.65191650390625 + [ + "semantic", + 8043608144162608258, + "TEXT", + "#/texts/35", + "text", + 1.0 ], - "iref": "#/texts/73", - "name": "text", - "orig-order": 87, - "page": 6, - "span": [ - 0, - 472 + [ + "language", + 7159467829896778939, + "TEXT", + "#/texts/36", + "en", + 0.44 ], - "sref": "#/page-elements/89", - "text-order": 89, - "type": "paragraph" - }, - { - "bbox": [ - 45.762847900390625, - 71.06684875488281, - 554.2275390625, - 158.80230712890625 + [ + "semantic", + 7159467829896778939, + "TEXT", + "#/texts/36", + "header", + 0.75 ], - "iref": "#/texts/74", - "name": "text", - "orig-order": 88, - "page": 6, - "span": [ - 0, - 761 + [ + "language", + 5617240156952377, + "TEXT", + "#/texts/37", + "en", + 0.93 ], - "sref": "#/page-elements/90", - "text-order": 90, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 5617240156952377, + "TEXT", + "#/texts/37", + "text", + 1.0 ], - "iref": "#/texts/75", - "name": "text", - "orig-order": 91, - "page": 6, - "span": [ - 0, - 320 + [ + "language", + 3276490574487379366, + "TEXT", + "#/texts/38", + "en", + 0.84 ], - "sref": "#/page-elements/91", - "text-order": 91, - "type": "paragraph" - }, - { - "bbox": [ - 44.35243225097656, - 751.4635620117188, - 85.42164611816406, - 758.9300537109375 + [ + "semantic", + 3276490574487379366, + "TEXT", + "#/texts/38", + "text", + 0.99 ], - "iref": "#/page-headers/9", - "name": "page-header", - "orig-order": 103, - "page": 7, - "span": [ - 0, - 13 + [ + "language", + 3367451956962330174, + "TEXT", + "#/texts/39", + "en", + 0.94 ], - "sref": "#/page-elements/92", - "text-order": 92, - "type": "page-header" - }, - { - "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + [ + "semantic", + 3367451956962330174, + "TEXT", + "#/texts/39", + "header", + 1.0 ], - "iref": "#/texts/76", - "name": "text", - "orig-order": 104, - "page": 7, - "span": [ - 0, - 5 + [ + "language", + 5509744459704235873, + "TEXT", + "#/texts/40", + "en", + 0.87 ], - "sref": "#/page-elements/93", - "text-order": 93, - "type": "paragraph" - }, - { - "bbox": [ - 44.78684997558594, - 695.0850830078125, - 549.5508422851562, - 730.6725463867188 + [ + "semantic", + 5509744459704235873, + "TEXT", + "#/texts/40", + "text", + 1.0 ], - "iref": "#/texts/77", - "name": "text", - "orig-order": 92, - "page": 7, - "span": [ - 0, - 324 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/41", + "en", + 0.78 ], - "sref": "#/page-elements/94", - "text-order": 94, - "type": "paragraph" - }, - { - "bbox": [ - 44.71910095214844, - 655.5153198242188, - 236.7943572998047, - 666.9031982421875 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/41", + "text", + 0.52 ], - "iref": "#/texts/78", - "name": "subtitle-level-1", - "orig-order": 93, - "page": 7, - "span": [ - 0, - 32 + [ + "language", + 4361549176688508574, + "TEXT", + "#/texts/42", + "en", + 0.16 ], - "sref": "#/page-elements/95", - "text-order": 95, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78636169433594, - 578.0709838867188, - 549.254638671875, - 640.1705932617188 + [ + "semantic", + 4361549176688508574, + "TEXT", + "#/texts/42", + "meta-data", + 0.76 ], - "iref": "#/texts/79", - "name": "text", - "orig-order": 94, - "page": 7, - "span": [ - 0, - 502 + [ + "language", + 12374482891052873875, + "TEXT", + "#/texts/43", + "en", + 0.54 ], - "sref": "#/page-elements/96", - "text-order": 96, - "type": "paragraph" - }, - { - "bbox": [ - 44.733577728271484, - 539.0667114257812, - 548.8603515625, - 576.5675048828125 + [ + "semantic", + 12374482891052873875, + "TEXT", + "#/texts/43", + "header", + 0.99 ], - "iref": "#/texts/80", - "name": "text", - "orig-order": 95, - "page": 7, - "span": [ - 0, - 324 + [ + "language", + 2755397864153233778, + "TEXT", + "#/texts/44", + "en", + 0.9 ], - "sref": "#/page-elements/97", - "text-order": 97, - "type": "paragraph" - }, - { - "bbox": [ - 214.75270080566406, - 498.5877685546875, - 548.7813110351562, - 529.3681030273438 + [ + "semantic", + 2755397864153233778, + "TEXT", + "#/texts/44", + "text", + 1.0 ], - "iref": "#/texts/81", - "name": "formula", - "orig-order": 96, - "page": 7, - "span": [ - 0, - 92 + [ + "language", + 4698316471746130896, + "TEXT", + "#/texts/45", + "en", + 0.91 ], - "sref": "#/page-elements/98", - "text-order": 98, - "type": "equation" - }, - { - "bbox": [ - 44.784271240234375, - 435.0351257324219, - 548.7523193359375, - 470.5306396484375 + [ + "semantic", + 4698316471746130896, + "TEXT", + "#/texts/45", + "text", + 1.0 ], - "iref": "#/texts/82", - "name": "text", - "orig-order": 97, - "page": 7, - "span": [ - 0, - 327 + [ + "language", + 11827267218358801841, + "TEXT", + "#/texts/46", + "en", + 0.92 ], - "sref": "#/page-elements/99", - "text-order": 99, - "type": "paragraph" - }, - { - "bbox": [ - 234.89254760742188, - 399.494873046875, - 549.147216796875, - 425.90399169921875 + [ + "semantic", + 11827267218358801841, + "TEXT", + "#/texts/46", + "text", + 1.0 ], - "iref": "#/texts/83", - "name": "formula", - "orig-order": 98, - "page": 7, - "span": [ - 0, - 114 + [ + "language", + 6297710299044869343, + "TEXT", + "#/texts/47", + "fr", + 0.28 ], - "sref": "#/page-elements/100", - "text-order": 100, - "type": "equation" - }, - { - "bbox": [ - 44.786224365234375, - 279.0730285644531, - 549.0149536132812, - 379.8307189941406 + [ + "semantic", + 6297710299044869343, + "TEXT", + "#/texts/47", + "header", + 1.0 ], - "iref": "#/texts/84", - "name": "text", - "orig-order": 99, - "page": 7, - "span": [ - 0, - 960 + [ + "language", + 7158837349769150986, + "TEXT", + "#/texts/48", + "en", + 0.88 ], - "sref": "#/page-elements/101", - "text-order": 101, - "type": "paragraph" - }, - { - "bbox": [ - 44.786224365234375, - 253.05079650878906, - 549.2977294921875, - 275.7553405761719 + [ + "semantic", + 7158837349769150986, + "TEXT", + "#/texts/48", + "text", + 1.0 ], - "iref": "#/texts/85", - "name": "text", - "orig-order": 100, - "page": 7, - "span": [ - 0, - 204 + [ + "language", + 1150871476689677866, + "TEXT", + "#/texts/49", + "en", + 0.93 ], - "sref": "#/page-elements/102", - "text-order": 102, - "type": "paragraph" - }, - { - "bbox": [ - 43.776466369628906, - 213.4808349609375, - 380.18682861328125, - 224.8687286376953 + [ + "semantic", + 1150871476689677866, + "TEXT", + "#/texts/49", + "text", + 1.0 ], - "iref": "#/texts/86", - "name": "subtitle-level-1", - "orig-order": 101, - "page": 7, - "span": [ - 0, - 54 + [ + "language", + 5163702913945903725, + "TEXT", + "#/texts/50", + "en", + 0.96 ], - "sref": "#/page-elements/103", - "text-order": 103, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 58.08219528198242, - 550.3234252929688, - 197.4915771484375 + [ + "semantic", + 5163702913945903725, + "TEXT", + "#/texts/50", + "text", + 1.0 ], - "iref": "#/texts/87", - "name": "text", - "orig-order": 102, - "page": 7, - "span": [ - 0, - 1216 + [ + "language", + 5462319091745771382, + "TEXT", + "#/texts/51", + "en", + 0.9 ], - "sref": "#/page-elements/104", - "text-order": 104, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 5462319091745771382, + "TEXT", + "#/texts/51", + "text", + 0.83 ], - "iref": "#/texts/88", - "name": "text", - "orig-order": 105, - "page": 7, - "span": [ - 0, - 320 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/52", + "en", + 0.78 ], - "sref": "#/page-elements/105", - "text-order": 105, - "type": "paragraph" - }, - { - "bbox": [ - 45.74378967285156, - 751.4075317382812, - 68.55958557128906, - 758.9868774414062 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/52", + "text", + 0.52 ], - "iref": "#/page-headers/10", - "name": "page-header", - "orig-order": 113, - "page": 8, - "span": [ - 0, - 6 + [ + "language", + 958124839653591304, + "TEXT", + "#/texts/53", + "en", + 0.93 ], - "sref": "#/page-elements/106", - "text-order": 106, - "type": "page-header" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.921142578125, - 758.3907470703125 + [ + "semantic", + 958124839653591304, + "TEXT", + "#/texts/53", + "text", + 0.98 ], - "iref": "#/page-headers/11", - "name": "page-header", - "orig-order": 114, - "page": 8, - "span": [ - 0, - 13 + [ + "language", + 1448405324616602032, + "TEXT", + "#/texts/54", + "en", + 0.86 ], - "sref": "#/page-elements/107", - "text-order": 107, - "type": "page-header" - }, - { - "bbox": [ - 96.34707641601562, - 537.8071899414062, - 496.8702697753906, - 731.7752075195312 + [ + "semantic", + 1448405324616602032, + "TEXT", + "#/texts/54", + "text", + 0.99 ], - "iref": "#/figures/2", - "name": "picture", - "orig-order": 116, - "page": 8, - "span": [ - 0, - 0 + [ + "language", + 2617775076168299948, + "TEXT", + "#/texts/55", + "en", + 0.79 ], - "sref": "#/page-elements/108", - "text-order": 108, - "type": "figure" - }, - { - "bbox": [ - 46.00423812866211, - 491.7976379394531, - 543.2025756835938, - 523.7771606445312 + [ + "semantic", + 2617775076168299948, + "TEXT", + "#/texts/55", + "header", + 0.98 ], - "iref": "#/figures/2/captions/0", - "name": "caption", - "orig-order": 112, - "page": 8, - "span": [ - 0, - 268 + [ + "language", + 13974986056043304735, + "TEXT", + "#/texts/56", + "en", + 0.93 ], - "sref": "#/page-elements/109", - "text-order": 109, - "type": "caption" - }, - { - "bbox": [ - 46.486663818359375, - 370.0644836425781, - 551.9771728515625, - 457.6360168457031 + [ + "semantic", + 13974986056043304735, + "TEXT", + "#/texts/56", + "text", + 1.0 ], - "iref": "#/texts/89", - "name": "text", - "orig-order": 106, - "page": 8, - "span": [ - 0, - 745 + [ + "language", + 5985285694705576020, + "TEXT", + "#/texts/57", + "en", + 0.83 ], - "sref": "#/page-elements/110", - "text-order": 110, - "type": "paragraph" - }, - { - "bbox": [ - 46.486663818359375, - 239.97216796875, - 551.4871215820312, - 366.491455078125 + [ + "semantic", + 5985285694705576020, + "TEXT", + "#/texts/57", + "header", + 0.92 ], - "iref": "#/texts/90", - "name": "text", - "orig-order": 107, - "page": 8, - "span": [ - 0, - 1027 + [ + "language", + 11235296141350659290, + "TEXT", + "#/texts/58", + "en", + 0.96 ], - "sref": "#/page-elements/111", - "text-order": 111, - "type": "paragraph" - }, - { - "bbox": [ - 45.14011764526367, - 200.4981231689453, - 333.7398986816406, - 211.88601684570312 + [ + "semantic", + 11235296141350659290, + "TEXT", + "#/texts/58", + "text", + 1.0 ], - "iref": "#/texts/91", - "name": "subtitle-level-1", - "orig-order": 108, - "page": 8, - "span": [ - 0, - 48 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/59", + "en", + 0.78 ], - "sref": "#/page-elements/112", - "text-order": 112, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.9116325378418, - 162.0589599609375, - 551.3727416992188, - 184.45217895507812 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/59", + "text", + 0.52 ], - "iref": "#/texts/92", - "name": "text", - "orig-order": 109, - "page": 8, - "span": [ - 0, - 179 + [ + "language", + 4361549266576336732, + "TEXT", + "#/texts/60", + "eu", + 0.19 ], - "sref": "#/page-elements/113", - "text-order": 113, - "type": "paragraph" - }, - { - "bbox": [ - 46.21662902832031, - 84.04818725585938, - 550.9126586914062, - 158.48593139648438 + [ + "semantic", + 4361549266576336732, + "TEXT", + "#/texts/60", + "meta-data", + 0.81 ], - "iref": "#/texts/93", - "name": "text", - "orig-order": 110, - "page": 8, - "span": [ - 0, - 643 + [ + "language", + 5771309285006424458, + "TEXT", + "#/texts/61", + "en", + 0.93 ], - "sref": "#/page-elements/114", - "text-order": 114, - "type": "paragraph" - }, - { - "bbox": [ - 44.992271423339844, - 45.01641845703125, - 552.1865844726562, - 80.5264892578125 + [ + "semantic", + 5771309285006424458, + "TEXT", + "#/texts/61", + "text", + 0.99 ], - "iref": "#/texts/94", - "name": "text", - "orig-order": 111, - "page": 8, - "span": [ - 0, - 262 + [ + "language", + 5371685212527510397, + "TEXT", + "#/texts/62", + "en", + 0.77 ], - "sref": "#/page-elements/115", - "text-order": 115, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 5371685212527510397, + "TEXT", + "#/texts/62", + "header", + 1.0 ], - "iref": "#/texts/95", - "name": "text", - "orig-order": 115, - "page": 8, - "span": [ - 0, - 320 + [ + "language", + 7817257645383866853, + "TEXT", + "#/texts/63", + "en", + 0.91 ], - "sref": "#/page-elements/116", - "text-order": 116, - "type": "paragraph" - }, - { - "bbox": [ - 44.34560012817383, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + [ + "semantic", + 7817257645383866853, + "TEXT", + "#/texts/63", + "text", + 1.0 ], - "iref": "#/page-headers/12", - "name": "page-header", - "orig-order": 126, - "page": 9, - "span": [ - 0, - 13 + [ + "language", + 2929626768872004841, + "TEXT", + "#/texts/64", + "en", + 0.81 ], - "sref": "#/page-elements/117", - "text-order": 117, - "type": "page-header" - }, - { - "bbox": [ - 528.5497436523438, - 751.4075317382812, - 550.62109375, - 758.0504760742188 + [ + "semantic", + 2929626768872004841, + "TEXT", + "#/texts/64", + "text", + 0.85 ], - "iref": "#/texts/96", - "name": "text", - "orig-order": 127, - "page": 9, - "span": [ - 0, - 5 + [ + "language", + 15879756297712818143, + "TEXT", + "#/texts/65", + "en", + 0.74 ], - "sref": "#/page-elements/118", - "text-order": 118, - "type": "paragraph" - }, - { - "bbox": [ - 116.26325988769531, - 507.8388977050781, - 473.644775390625, - 731.2719116210938 + [ + "semantic", + 15879756297712818143, + "TEXT", + "#/texts/65", + "text", + 0.97 ], - "iref": "#/figures/3", - "name": "picture", - "orig-order": 129, - "page": 9, - "span": [ - 0, - 0 + [ + "language", + 16116531546352845311, + "TEXT", + "#/texts/66", + "en", + 0.96 ], - "sref": "#/page-elements/119", - "text-order": 119, - "type": "figure" - }, - { - "bbox": [ - 44.78739929199219, - 447.43023681640625, - 541.6075439453125, - 491.6891174316406 + [ + "semantic", + 16116531546352845311, + "TEXT", + "#/texts/66", + "text", + 0.92 ], - "iref": "#/figures/3/captions/0", - "name": "caption", - "orig-order": 125, - "page": 9, - "span": [ - 0, - 473 + [ + "language", + 9541434157786316356, + "TEXT", + "#/texts/67", + "en", + 0.95 ], - "sref": "#/page-elements/120", - "text-order": 120, - "type": "caption" - }, - { - "bbox": [ - 44.418067932128906, - 395.521728515625, - 176.333251953125, - 406.9096374511719 + [ + "semantic", + 9541434157786316356, + "TEXT", + "#/texts/67", + "text", + 1.0 ], - "iref": "#/texts/97", - "name": "subtitle-level-1", - "orig-order": 117, - "page": 9, - "span": [ - 0, - 22 + [ + "language", + 997682002692959482, + "TEXT", + "#/texts/68", + "en", + 0.89 ], - "sref": "#/page-elements/121", - "text-order": 121, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 343.8106384277344, - 548.7684326171875, - 379.5713806152344 + [ + "semantic", + 997682002692959482, + "TEXT", + "#/texts/68", + "text", + 1.0 ], - "iref": "#/texts/98", - "name": "text", - "orig-order": 118, - "page": 9, - "span": [ - 0, - 270 + [ + "language", + 11590138063543342276, + "TEXT", + "#/texts/69", + "en", + 0.5 ], - "sref": "#/page-elements/122", - "text-order": 122, - "type": "paragraph" - }, - { - "bbox": [ - 245.61886596679688, - 303.5643005371094, - 549.354736328125, - 334.3446350097656 + [ + "semantic", + 11590138063543342276, + "TEXT", + "#/texts/69", + "header", + 0.98 ], - "iref": "#/texts/99", - "name": "formula", - "orig-order": 119, - "page": 9, - "span": [ - 0, - 72 + [ + "language", + 16380310806374538602, + "TEXT", + "#/texts/70", + "en", + 0.86 ], - "sref": "#/page-elements/123", - "text-order": 123, - "type": "equation" - }, - { - "bbox": [ - 44.27131652832031, - 266.0909118652344, - 323.5520935058594, - 275.5295104980469 + [ + "semantic", + 16380310806374538602, + "TEXT", + "#/texts/70", + "text", + 0.99 ], - "iref": "#/texts/100", - "name": "text", - "orig-order": 120, - "page": 9, - "span": [ - 0, - 69 + [ + "language", + 5393976293631695754, + "TEXT", + "#/texts/71", + "en", + 0.9 ], - "sref": "#/page-elements/124", - "text-order": 124, - "type": "paragraph" - }, - { - "bbox": [ - 44.087921142578125, - 226.52023315429688, - 183.25424194335938, - 237.9081268310547 + [ + "semantic", + 5393976293631695754, + "TEXT", + "#/texts/71", + "text", + 1.0 ], - "iref": "#/texts/101", - "name": "subtitle-level-1", - "orig-order": 121, - "page": 9, - "span": [ - 0, - 23 + [ + "language", + 1988335831916069382, + "TEXT", + "#/texts/72", + "en", + 0.93 ], - "sref": "#/page-elements/125", - "text-order": 125, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.12942886352539, - 149.07611083984375, - 549.1555786132812, - 210.865478515625 + [ + "semantic", + 1988335831916069382, + "TEXT", + "#/texts/72", + "text", + 1.0 ], - "iref": "#/texts/102", - "name": "text", - "orig-order": 122, - "page": 9, - "span": [ - 0, - 580 + [ + "language", + 5147764798816678886, + "TEXT", + "#/texts/73", + "en", + 0.87 ], - "sref": "#/page-elements/126", - "text-order": 126, - "type": "paragraph" - }, - { - "bbox": [ - 213.45111083984375, - 107.99786376953125, - 548.7833251953125, - 139.26446533203125 + [ + "semantic", + 5147764798816678886, + "TEXT", + "#/texts/73", + "text", + 0.99 ], - "iref": "#/texts/103", - "name": "formula", - "orig-order": 123, - "page": 9, - "span": [ - 0, - 147 + [ + "language", + 285583876932865368, + "TEXT", + "#/texts/74", + "en", + 0.97 ], - "sref": "#/page-elements/127", - "text-order": 127, - "type": "equation" - }, - { - "bbox": [ - 44.78630447387695, - 45.0455436706543, - 548.7993774414062, - 80.76483154296875 + [ + "semantic", + 285583876932865368, + "TEXT", + "#/texts/74", + "text", + 1.0 ], - "iref": "#/texts/104", - "name": "text", - "orig-order": 124, - "page": 9, - "span": [ - 0, - 307 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/75", + "en", + 0.78 ], - "sref": "#/page-elements/128", - "text-order": 128, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/75", + "text", + 0.52 ], - "iref": "#/texts/105", - "name": "text", - "orig-order": 128, - "page": 9, - "span": [ - 0, - 320 + [ + "language", + 4361549257370278754, + "TEXT", + "#/texts/76", + "zh", + 0.41 ], - "sref": "#/page-elements/129", - "text-order": 129, - "type": "paragraph" - }, - { - "bbox": [ - 45.890689849853516, - 743.98095703125, - 143.1890869140625, - 761.30615234375 + [ + "semantic", + 4361549257370278754, + "TEXT", + "#/texts/76", + "meta-data", + 0.77 ], - "iref": "#/figures/4", - "name": "picture", - "orig-order": 142, - "page": 10, - "span": [ - 0, - 0 + [ + "language", + 13183039880198077038, + "TEXT", + "#/texts/77", + "en", + 0.91 ], - "sref": "#/page-elements/130", - "text-order": 130, - "type": "figure" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.8926391601562, - 758.5383911132812 + [ + "semantic", + 13183039880198077038, + "TEXT", + "#/texts/77", + "text", + 1.0 ], - "iref": "#/page-headers/13", - "name": "page-header", - "orig-order": 143, - "page": 10, - "span": [ - 0, - 13 + [ + "language", + 13428900458866068249, + "TEXT", + "#/texts/78", + "en", + 0.9 ], - "sref": "#/page-elements/131", - "text-order": 131, - "type": "page-header" - }, - { - "bbox": [ - 44.981788635253906, - 720.4783935546875, - 201.29905700683594, - 731.9963989257812 + [ + "semantic", + 13428900458866068249, + "TEXT", + "#/texts/78", + "header", + 0.94 ], - "iref": "#/texts/106", - "name": "subtitle-level-1", - "orig-order": 130, - "page": 10, - "span": [ - 0, - 26 + [ + "language", + 1430911655724119030, + "TEXT", + "#/texts/79", + "en", + 0.92 + ], + [ + "semantic", + 1430911655724119030, + "TEXT", + "#/texts/79", + "text", + 1.0 ], - "sref": "#/page-elements/132", - "text-order": 132, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.0963020324707, - 656.0805053710938, - 554.1248779296875, - 705.2210693359375 + [ + "language", + 13770706479324480755, + "TEXT", + "#/texts/80", + "en", + 0.93 ], - "iref": "#/texts/107", - "name": "text", - "orig-order": 131, - "page": 10, - "span": [ - 0, - 390 + [ + "semantic", + 13770706479324480755, + "TEXT", + "#/texts/80", + "text", + 1.0 ], - "sref": "#/page-elements/133", - "text-order": 133, - "type": "paragraph" - }, - { - "bbox": [ - 45.49040985107422, - 616.5106201171875, - 214.94256591796875, - 627.93359375 + [ + "language", + 11165481757050847950, + "TEXT", + "#/texts/81", + "en", + 0.12 ], - "iref": "#/texts/108", - "name": "subtitle-level-1", - "orig-order": 132, - "page": 10, - "span": [ - 0, - 27 + [ + "semantic", + 11165481757050847950, + "TEXT", + "#/texts/81", + "text", + 1.0 ], - "sref": "#/page-elements/134", - "text-order": 134, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.356536865234375, - 578.0712890625, - 552.450927734375, - 600.5599365234375 + [ + "language", + 9572077971492738329, + "TEXT", + "#/texts/82", + "en", + 0.93 ], - "iref": "#/texts/109", - "name": "text", - "orig-order": 133, - "page": 10, - "span": [ - 0, - 172 + [ + "semantic", + 9572077971492738329, + "TEXT", + "#/texts/82", + "text", + 0.99 ], - "sref": "#/page-elements/135", - "text-order": 135, - "type": "paragraph" - }, - { - "bbox": [ - 46.00928497314453, - 500.0617370605469, - 551.898193359375, - 574.4982299804688 + [ + "language", + 14951391138799557075, + "TEXT", + "#/texts/83", + "pl", + 0.13 ], - "iref": "#/texts/110", - "name": "text", - "orig-order": 134, - "page": 10, - "span": [ - 0, - 691 + [ + "semantic", + 14951391138799557075, + "TEXT", + "#/texts/83", + "text", + 1.0 ], - "sref": "#/page-elements/136", - "text-order": 136, - "type": "paragraph" - }, - { - "bbox": [ - 45.801177978515625, - 448.0732421875, - 552.126953125, - 496.556396484375 + [ + "language", + 16602156009514813718, + "TEXT", + "#/texts/84", + "en", + 0.95 ], - "iref": "#/texts/111", - "name": "text", - "orig-order": 135, - "page": 10, - "span": [ - 0, - 420 + [ + "semantic", + 16602156009514813718, + "TEXT", + "#/texts/84", + "text", + 1.0 ], - "sref": "#/page-elements/137", - "text-order": 137, - "type": "paragraph" - }, - { - "bbox": [ - 46.02473449707031, - 408.5044250488281, - 321.5076904296875, - 419.892333984375 + [ + "language", + 7162849562576593449, + "TEXT", + "#/texts/85", + "en", + 0.95 ], - "iref": "#/texts/112", - "name": "subtitle-level-1", - "orig-order": 136, - "page": 10, - "span": [ - 0, - 31 + [ + "semantic", + 7162849562576593449, + "TEXT", + "#/texts/85", + "text", + 0.98 ], - "sref": "#/page-elements/138", - "text-order": 138, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.301429748535156, - 357.0820007324219, - 550.6118774414062, - 392.4583435058594 + [ + "language", + 15385417954505503552, + "TEXT", + "#/texts/86", + "en", + 0.83 ], - "iref": "#/texts/113", - "name": "text", - "orig-order": 137, - "page": 10, - "span": [ - 0, - 334 + [ + "semantic", + 15385417954505503552, + "TEXT", + "#/texts/86", + "header", + 0.93 ], - "sref": "#/page-elements/139", - "text-order": 139, - "type": "paragraph" - }, - { - "bbox": [ - 46.488189697265625, - 253.0490264892578, - 551.0360107421875, - 353.4529724121094 + [ + "language", + 10815650641518265876, + "TEXT", + "#/texts/87", + "en", + 0.94 ], - "iref": "#/texts/114", - "name": "text", - "orig-order": 138, - "page": 10, - "span": [ - 0, - 847 + [ + "semantic", + 10815650641518265876, + "TEXT", + "#/texts/87", + "text", + 1.0 ], - "sref": "#/page-elements/140", - "text-order": 140, - "type": "paragraph" - }, - { - "bbox": [ - 46.440311431884766, - 188.080810546875, - 551.396484375, - 249.4759979248047 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/88", + "en", + 0.78 ], - "iref": "#/texts/115", - "name": "text", - "orig-order": 139, - "page": 10, - "span": [ - 0, - 477 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/88", + "text", + 0.52 ], - "sref": "#/page-elements/141", - "text-order": 141, - "type": "paragraph" - }, - { - "bbox": [ - 46.27632141113281, - 136.03631591796875, - 550.9563598632812, - 184.4517822265625 + [ + "language", + 12004249365408683930, + "TEXT", + "#/texts/89", + "en", + 0.92 ], - "iref": "#/texts/116", - "name": "text", - "orig-order": 140, - "page": 10, - "span": [ - 0, - 404 + [ + "semantic", + 12004249365408683930, + "TEXT", + "#/texts/89", + "text", + 1.0 ], - "sref": "#/page-elements/142", - "text-order": 142, - "type": "paragraph" - }, - { - "bbox": [ - 46.42215347290039, - 58.08152389526367, - 551.0359497070312, - 132.46327209472656 + [ + "language", + 7223381657047466215, + "TEXT", + "#/texts/90", + "en", + 0.92 ], - "iref": "#/texts/117", - "name": "text", - "orig-order": 141, - "page": 10, - "span": [ - 0, - 572 + [ + "semantic", + 7223381657047466215, + "TEXT", + "#/texts/90", + "text", + 1.0 ], - "sref": "#/page-elements/143", - "text-order": 143, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 15132906055887224772, + "TEXT", + "#/texts/91", + "en", + 0.82 ], - "iref": "#/texts/118", - "name": "text", - "orig-order": 144, - "page": 10, - "span": [ - 0, - 320 + [ + "semantic", + 15132906055887224772, + "TEXT", + "#/texts/91", + "header", + 0.99 ], - "sref": "#/page-elements/144", - "text-order": 144, - "type": "paragraph" - }, - { - "bbox": [ - 43.98883056640625, - 751.4635620117188, - 84.67137145996094, - 758.0504760742188 + [ + "language", + 17129434987283608290, + "TEXT", + "#/texts/92", + "en", + 0.94 ], - "iref": "#/page-headers/14", - "name": "page-header", - "orig-order": 150, - "page": 11, - "span": [ - 0, - 13 + [ + "semantic", + 17129434987283608290, + "TEXT", + "#/texts/92", + "text", + 1.0 ], - "sref": "#/page-elements/145", - "text-order": 145, - "type": "page-header" - }, - { - "bbox": [ - 525.1477661132812, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + [ + "language", + 10350406469077463155, + "TEXT", + "#/texts/93", + "en", + 0.92 ], - "iref": "#/texts/119", - "name": "text", - "orig-order": 151, - "page": 11, - "span": [ - 0, - 6 + [ + "semantic", + 10350406469077463155, + "TEXT", + "#/texts/93", + "text", + 1.0 ], - "sref": "#/page-elements/146", - "text-order": 146, - "type": "paragraph" - }, - { - "bbox": [ - 48.36570739746094, - 477.8360900878906, - 548.3624267578125, - 732.3331298828125 + [ + "language", + 16949854269270315165, + "TEXT", + "#/texts/94", + "en", + 0.91 ], - "iref": "#/figures/5", - "name": "picture", - "orig-order": 153, - "page": 11, - "span": [ - 0, - 0 + [ + "semantic", + 16949854269270315165, + "TEXT", + "#/texts/94", + "text", + 1.0 ], - "sref": "#/page-elements/147", - "text-order": 147, - "type": "figure" - }, - { - "bbox": [ - 44.78739929199219, - 428.34173583984375, - 541.0477905273438, - 460.564697265625 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/95", + "en", + 0.78 ], - "iref": "#/figures/5/captions/0", - "name": "caption", - "orig-order": 149, - "page": 11, - "span": [ - 0, - 275 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/95", + "text", + 0.52 ], - "sref": "#/page-elements/148", - "text-order": 148, - "type": "caption" - }, - { - "bbox": [ - 44.78684997558594, - 331.06005859375, - 550.6510620117188, - 405.4977722167969 + [ + "language", + 4361549266593946746, + "TEXT", + "#/texts/96", + "fr", + 0.37 ], - "iref": "#/texts/120", - "name": "text", - "orig-order": 145, - "page": 11, - "span": [ - 0, - 596 + [ + "semantic", + 4361549266593946746, + "TEXT", + "#/texts/96", + "meta-data", + 0.75 ], - "sref": "#/page-elements/149", - "text-order": 149, - "type": "paragraph" - }, - { - "bbox": [ - 44.489322662353516, - 291.4902038574219, - 365.9893798828125, - 302.87811279296875 + [ + "language", + 9802652237802670052, + "TEXT", + "#/texts/97", + "zh", + 0.17 ], - "iref": "#/texts/121", - "name": "subtitle-level-1", - "orig-order": 146, - "page": 11, - "span": [ - 0, - 39 + [ + "semantic", + 9802652237802670052, + "TEXT", + "#/texts/97", + "header", + 0.96 ], - "sref": "#/page-elements/150", - "text-order": 150, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.785736083984375, - 175.04168701171875, - 549.7868041992188, - 275.5009460449219 + [ + "language", + 5524728206729419689, + "TEXT", + "#/texts/98", + "en", + 0.9 ], - "iref": "#/texts/122", - "name": "text", - "orig-order": 147, - "page": 11, - "span": [ - 0, - 861 + [ + "semantic", + 5524728206729419689, + "TEXT", + "#/texts/98", + "text", + 1.0 ], - "sref": "#/page-elements/151", - "text-order": 151, - "type": "paragraph" - }, - { - "bbox": [ - 44.785736083984375, - 45.043888092041016, - 549.4429931640625, - 171.5908203125 + [ + "language", + 4043385013945968936, + "TEXT", + "#/texts/99", + "sv", + 0.11 ], - "iref": "#/texts/123", - "name": "text", - "orig-order": 148, - "page": 11, - "span": [ - 0, - 1189 + [ + "semantic", + 4043385013945968936, + "TEXT", + "#/texts/99", + "text", + 0.99 ], - "sref": "#/page-elements/152", - "text-order": 152, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 11778884428660217326, + "TEXT", + "#/texts/100", + "en", + 0.83 ], - "iref": "#/texts/124", - "name": "text", - "orig-order": 152, - "page": 11, - "span": [ - 0, - 320 + [ + "semantic", + 11778884428660217326, + "TEXT", + "#/texts/100", + "text", + 1.0 ], - "sref": "#/page-elements/153", - "text-order": 153, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 51.251686096191406, - 758.0504760742188 + [ + "language", + 12875050310340408203, + "TEXT", + "#/texts/101", + "en", + 0.37 ], - "iref": "#/texts/125", - "name": "text", - "orig-order": 166, - "page": 12, - "span": [ - 0, - 2 + [ + "semantic", + 12875050310340408203, + "TEXT", + "#/texts/101", + "header", + 0.96 ], - "sref": "#/page-elements/154", - "text-order": 154, - "type": "paragraph" - }, - { - "bbox": [ - 56.12232208251953, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + [ + "language", + 3785875504044487339, + "TEXT", + "#/texts/102", + "en", + 0.93 ], - "iref": "#/texts/126", - "name": "text", - "orig-order": 167, - "page": 12, - "span": [ - 0, - 5 + [ + "semantic", + 3785875504044487339, + "TEXT", + "#/texts/102", + "text", + 1.0 ], - "sref": "#/page-elements/155", - "text-order": 155, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.4635620117188, - 550.7427368164062, - 758.252197265625 + [ + "language", + 12105626155924658285, + "TEXT", + "#/texts/103", + "ja", + 0.11 ], - "iref": "#/page-headers/15", - "name": "page-header", - "orig-order": 168, - "page": 12, - "span": [ - 0, - 13 + [ + "semantic", + 12105626155924658285, + "TEXT", + "#/texts/103", + "text", + 1.0 ], - "sref": "#/page-elements/156", - "text-order": 156, - "type": "page-header" - }, - { - "bbox": [ - 55.876461029052734, - 606.848876953125, - 541.853759765625, - 729.6771850585938 + [ + "language", + 16265612055607243129, + "TEXT", + "#/texts/104", + "en", + 0.91 ], - "iref": "#/figures/6", - "name": "picture", - "orig-order": 164, - "page": 12, - "span": [ - 0, - 0 + [ + "semantic", + 16265612055607243129, + "TEXT", + "#/texts/104", + "text", + 1.0 ], - "sref": "#/page-elements/157", - "text-order": 157, - "type": "figure" - }, - { - "bbox": [ - 44.766658782958984, - 585.4602661132812, - 387.12310791015625, - 593.5936279296875 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/105", + "en", + 0.78 ], - "iref": "#/figures/6/captions/0", - "name": "caption", - "orig-order": 165, - "page": 12, - "span": [ - 0, - 88 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/105", + "text", + 0.52 ], - "sref": "#/page-elements/158", - "text-order": 158, - "type": "caption" - }, - { - "bbox": [ - 45.36357116699219, - 526.083984375, - 552.5618286132812, - 548.4772338867188 + [ + "language", + 10252446451495472512, + "TEXT", + "#/texts/106", + "en", + 0.82 ], - "iref": "#/texts/127", - "name": "text", - "orig-order": 154, - "page": 12, - "span": [ - 0, - 171 + [ + "semantic", + 10252446451495472512, + "TEXT", + "#/texts/106", + "header", + 0.85 ], - "sref": "#/page-elements/159", - "text-order": 159, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 448.0732421875, - 552.16748046875, - 522.4549560546875 + [ + "language", + 17011944206067158637, + "TEXT", + "#/texts/107", + "en", + 0.92 ], - "iref": "#/texts/128", - "name": "text", - "orig-order": 155, - "page": 12, - "span": [ - 0, - 596 + [ + "semantic", + 17011944206067158637, + "TEXT", + "#/texts/107", + "text", + 1.0 ], - "sref": "#/page-elements/160", - "text-order": 160, - "type": "paragraph" - }, - { - "bbox": [ - 46.228458404541016, - 382.8196716308594, - 552.1286010742188, - 444.5987854003906 + [ + "language", + 16289627123982758705, + "TEXT", + "#/texts/108", + "en", + 0.51 ], - "iref": "#/texts/129", - "name": "text", - "orig-order": 156, - "page": 12, - "span": [ - 0, - 460 + [ + "semantic", + 16289627123982758705, + "TEXT", + "#/texts/108", + "header", + 0.97 ], - "sref": "#/page-elements/161", - "text-order": 161, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 357.0803527832031, - 309.6529846191406, - 366.4904479980469 + [ + "language", + 13969801897340997317, + "TEXT", + "#/texts/109", + "en", + 0.97 ], - "iref": "#/texts/130", - "name": "list-item", - "orig-order": 157, - "page": 12, - "span": [ - 0, - 57 + [ + "semantic", + 13969801897340997317, + "TEXT", + "#/texts/109", + "text", + 1.0 ], - "sref": "#/page-elements/162", - "text-order": 162, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 344.0412292480469, - 336.8304748535156, - 353.6436767578125 + [ + "language", + 105697770555684555, + "TEXT", + "#/texts/110", + "en", + 0.94 ], - "iref": "#/texts/131", - "name": "list-item", - "orig-order": 158, - "page": 12, - "span": [ - 0, - 65 + [ + "semantic", + 105697770555684555, + "TEXT", + "#/texts/110", + "text", + 1.0 ], - "sref": "#/page-elements/163", - "text-order": 163, - "type": "paragraph" - }, - { - "bbox": [ - 45.47064971923828, - 331.05810546875, - 478.3088684082031, - 340.54962158203125 + [ + "language", + 15938840672015995359, + "TEXT", + "#/texts/111", + "en", + 0.97 ], - "iref": "#/texts/132", - "name": "list-item", - "orig-order": 159, - "page": 12, - "span": [ - 0, - 101 + [ + "semantic", + 15938840672015995359, + "TEXT", + "#/texts/111", + "text", + 1.0 ], - "sref": "#/page-elements/164", - "text-order": 164, - "type": "paragraph" - }, - { - "bbox": [ - 46.16604232788086, - 214.04542541503906, - 551.7832641601562, - 314.4459533691406 + [ + "language", + 16505790528099785698, + "TEXT", + "#/texts/112", + "en", + 0.36 ], - "iref": "#/texts/133", - "name": "text", - "orig-order": 160, - "page": 12, - "span": [ - 0, - 923 + [ + "semantic", + 16505790528099785698, + "TEXT", + "#/texts/112", + "header", + 1.0 ], - "sref": "#/page-elements/165", - "text-order": 165, - "type": "paragraph" - }, - { - "bbox": [ - 46.26358413696289, - 149.0762481689453, - 551.3743896484375, - 210.68536376953125 + [ + "language", + 14738723905055920039, + "TEXT", + "#/texts/113", + "en", + 0.87 ], - "iref": "#/texts/134", - "name": "text", - "orig-order": 161, - "page": 12, - "span": [ - 0, - 569 + [ + "semantic", + 14738723905055920039, + "TEXT", + "#/texts/113", + "text", + 0.99 ], - "sref": "#/page-elements/166", - "text-order": 166, - "type": "paragraph" - }, - { - "bbox": [ - 45.70681381225586, - 71.06546783447266, - 551.875732421875, - 145.5064697265625 + [ + "language", + 5699550326698755904, + "TEXT", + "#/texts/114", + "en", + 0.89 ], - "iref": "#/texts/135", - "name": "text", - "orig-order": 162, - "page": 12, - "span": [ - 0, - 698 + [ + "semantic", + 5699550326698755904, + "TEXT", + "#/texts/114", + "text", + 1.0 ], - "sref": "#/page-elements/167", - "text-order": 167, - "type": "paragraph" - }, - { - "bbox": [ - 46.488380432128906, - 45.0432014465332, - 551.8381958007812, - 67.6728515625 + [ + "language", + 11609131422778723150, + "TEXT", + "#/texts/115", + "en", + 0.91 ], - "iref": "#/texts/136", - "name": "text", - "orig-order": 163, - "page": 12, - "span": [ - 0, - 218 + [ + "semantic", + 11609131422778723150, + "TEXT", + "#/texts/115", + "text", + 1.0 ], - "sref": "#/page-elements/168", - "text-order": 168, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 788128893109726279, + "TEXT", + "#/texts/116", + "en", + 0.97 ], - "iref": "#/texts/137", - "name": "text", - "orig-order": 169, - "page": 12, - "span": [ - 0, - 320 + [ + "semantic", + 788128893109726279, + "TEXT", + "#/texts/116", + "text", + 1.0 ], - "sref": "#/page-elements/169", - "text-order": 169, - "type": "paragraph" - }, - { - "bbox": [ - 44.31840515136719, - 751.4635620117188, - 84.67137145996094, - 758.0541381835938 + [ + "language", + 7029344862946908483, + "TEXT", + "#/texts/117", + "en", + 0.92 ], - "iref": "#/page-headers/16", - "name": "page-header", - "orig-order": 177, - "page": 13, - "span": [ - 0, - 13 + [ + "semantic", + 7029344862946908483, + "TEXT", + "#/texts/117", + "text", + 1.0 ], - "sref": "#/page-elements/170", - "text-order": 170, - "type": "page-header" - }, - { - "bbox": [ - 525.1477661132812, - 751.4075317382812, - 529.9112548828125, - 758.0504760742188 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/118", + "en", + 0.78 ], - "iref": "#/texts/138", - "name": "text", - "orig-order": 178, - "page": 13, - "span": [ - 0, - 2 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/118", + "text", + 0.52 ], - "sref": "#/page-elements/171", - "text-order": 171, - "type": "paragraph" - }, - { - "bbox": [ - 534.7818603515625, - 751.4075317382812, - 548.775146484375, - 758.0504760742188 + [ + "language", + 2144926686518491811, + "TEXT", + "#/texts/119", + "fr", + 0.21 ], - "iref": "#/texts/139", - "name": "text", - "orig-order": 179, - "page": 13, - "span": [ - 0, - 5 + [ + "semantic", + 2144926686518491811, + "TEXT", + "#/texts/119", + "meta-data", + 0.56 ], - "sref": "#/page-elements/172", - "text-order": 172, - "type": "paragraph" - }, - { - "bbox": [ - 45.15538024902344, - 607.3761596679688, - 548.95361328125, - 731.4898681640625 + [ + "language", + 18333396269095847693, + "TEXT", + "#/texts/120", + "en", + 0.95 ], - "iref": "#/figures/7", - "name": "picture", - "orig-order": 181, - "page": 13, - "span": [ - 0, - 0 + [ + "semantic", + 18333396269095847693, + "TEXT", + "#/texts/120", + "text", + 1.0 ], - "sref": "#/page-elements/173", - "text-order": 173, - "type": "figure" - }, - { - "bbox": [ - 44.35472869873047, - 537.0355224609375, - 539.2632446289062, - 593.7362670898438 + [ + "language", + 4030998538427149966, + "TEXT", + "#/texts/121", + "en", + 0.51 ], - "iref": "#/figures/7/captions/0", - "name": "text", - "orig-order": 174, - "page": 13, - "span": [ - 0, - 608 + [ + "semantic", + 4030998538427149966, + "TEXT", + "#/texts/121", + "header", + 0.96 ], - "sref": "#/page-elements/174", - "text-order": 174, - "type": "paragraph" - }, - { - "bbox": [ - 44.49153518676758, - 441.90771484375, - 181.1155242919922, - 498.2774658203125 + [ + "language", + 10295608624766759271, + "TEXT", + "#/texts/122", + "en", + 0.94 ], - "iref": "#/tables/0/captions/0", - "name": "caption", - "orig-order": 175, - "page": 13, - "span": [ - 0, - 160 + [ + "semantic", + 10295608624766759271, + "TEXT", + "#/texts/122", + "text", + 0.99 ], - "sref": "#/page-elements/175", - "text-order": 175, - "type": "caption" - }, - { - "bbox": [ - 210.0027313232422, - 346.577880859375, - 549.0220336914062, - 499.1263427734375 + [ + "language", + 10633780781731536747, + "TEXT", + "#/texts/123", + "en", + 0.95 ], - "iref": "#/tables/0", - "name": "table", - "orig-order": 176, - "page": 13, - "span": [ - 0, - 0 + [ + "semantic", + 10633780781731536747, + "TEXT", + "#/texts/123", + "text", + 1.0 ], - "sref": "#/page-elements/176", - "text-order": 176, - "type": "table" - }, - { - "bbox": [ - 44.78739929199219, - 292.05572509765625, - 549.0201416015625, - 314.4489440917969 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/124", + "en", + 0.78 ], - "iref": "#/texts/140", - "name": "text", - "orig-order": 170, - "page": 13, - "span": [ - 0, - 191 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/124", + "text", + 0.52 ], - "sref": "#/page-elements/177", - "text-order": 177, - "type": "paragraph" - }, - { - "bbox": [ - 44.786376953125, - 188.07875061035156, - 550.8748779296875, - 288.5342712402344 + [ + "language", + 1080447728722590413, + "TEXT", + "#/texts/125", + "en", + 0.13 ], - "iref": "#/texts/141", - "name": "text", - "orig-order": 171, - "page": 13, - "span": [ - 0, - 834 + [ + "semantic", + 1080447728722590413, + "TEXT", + "#/texts/125", + "header", + 1.0 ], - "sref": "#/page-elements/178", - "text-order": 178, - "type": "paragraph" - }, - { - "bbox": [ - 44.73537826538086, - 148.51072692871094, - 178.22747802734375, - 159.89862060546875 + [ + "language", + 4361549257087816853, + "TEXT", + "#/texts/126", + "en", + 0.95 ], - "iref": "#/texts/142", - "name": "subtitle-level-1", - "orig-order": 172, - "page": 13, - "span": [ - 0, - 15 + [ + "semantic", + 4361549257087816853, + "TEXT", + "#/texts/126", + "meta-data", + 0.53 + ], + [ + "language", + 10195664788154887804, + "TEXT", + "#/texts/127", + "en", + 0.98 ], - "sref": "#/page-elements/179", - "text-order": 179, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 44.78739929199219, - 58.0830192565918, - 549.515625, - 132.5465087890625 + [ + "semantic", + 10195664788154887804, + "TEXT", + "#/texts/127", + "text", + 1.0 ], - "iref": "#/texts/143", - "name": "text", - "orig-order": 173, - "page": 13, - "span": [ - 0, - 699 + [ + "language", + 7538054744015619336, + "TEXT", + "#/texts/128", + "en", + 0.92 ], - "sref": "#/page-elements/180", - "text-order": 180, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 7538054744015619336, + "TEXT", + "#/texts/128", + "text", + 0.99 ], - "iref": "#/texts/144", - "name": "text", - "orig-order": 180, - "page": 13, - "span": [ - 0, - 320 + [ + "language", + 12426662601736619109, + "TEXT", + "#/texts/129", + "en", + 0.95 ], - "sref": "#/page-elements/181", - "text-order": 181, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 751.4075317382812, - 70.11566162109375, - 758.0504760742188 + [ + "semantic", + 12426662601736619109, + "TEXT", + "#/texts/129", + "text", + 1.0 ], - "iref": "#/texts/145", - "name": "text", - "orig-order": 213, - "page": 14, - "span": [ - 0, - 6 + [ + "language", + 4162783521620221579, + "TEXT", + "#/texts/130", + "en", + 0.86 ], - "sref": "#/page-elements/182", - "text-order": 182, - "type": "paragraph" - }, - { - "bbox": [ - 510.634765625, - 751.3934326171875, - 551.0859985351562, - 759.209228515625 + [ + "semantic", + 4162783521620221579, + "TEXT", + "#/texts/130", + "text", + 0.78 ], - "iref": "#/page-headers/17", - "name": "page-header", - "orig-order": 214, - "page": 14, - "span": [ - 0, - 13 + [ + "language", + 5135259059216244866, + "TEXT", + "#/texts/131", + "en", + 0.93 ], - "sref": "#/page-elements/183", - "text-order": 183, - "type": "page-header" - }, - { - "bbox": [ - 46.38566589355469, - 708.0682373046875, - 552.190673828125, - 731.0924072265625 + [ + "semantic", + 5135259059216244866, + "TEXT", + "#/texts/131", + "text", + 0.99 ], - "iref": "#/texts/146", - "name": "text", - "orig-order": 182, - "page": 14, - "span": [ - 0, - 119 + [ + "language", + 16998817296948099535, + "TEXT", + "#/texts/132", + "en", + 0.97 ], - "sref": "#/page-elements/184", - "text-order": 184, - "type": "paragraph" - }, - { - "bbox": [ - 45.289154052734375, - 669.0628051757812, - 553.278076171875, - 705.6804809570312 + [ + "semantic", + 16998817296948099535, + "TEXT", + "#/texts/132", + "text", + 0.97 ], - "iref": "#/texts/147", - "name": "text", - "orig-order": 183, - "page": 14, - "span": [ - 0, - 322 + [ + "language", + 1205649569241141618, + "TEXT", + "#/texts/133", + "en", + 0.94 ], - "sref": "#/page-elements/185", - "text-order": 185, - "type": "paragraph" - }, - { - "bbox": [ - 44.96582794189453, - 643.04052734375, - 553.867431640625, - 666.6377563476562 + [ + "semantic", + 1205649569241141618, + "TEXT", + "#/texts/133", + "text", + 1.0 ], - "iref": "#/texts/148", - "name": "text", - "orig-order": 184, - "page": 14, - "span": [ - 0, - 172 + [ + "language", + 12257840490666828590, + "TEXT", + "#/texts/134", + "en", + 0.92 ], - "sref": "#/page-elements/186", - "text-order": 186, - "type": "paragraph" - }, - { - "bbox": [ - 46.48820114135742, - 616.512939453125, - 242.9811553955078, - 628.0685424804688 + [ + "semantic", + 12257840490666828590, + "TEXT", + "#/texts/134", + "text", + 1.0 ], - "iref": "#/texts/149", - "name": "subtitle-level-1", - "orig-order": 185, - "page": 14, - "span": [ - 0, - 27 + [ + "language", + 7040847965650746591, + "TEXT", + "#/texts/135", + "en", + 0.87 ], - "sref": "#/page-elements/187", - "text-order": 187, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 603.7968139648438, - 209.16476440429688, - 615.1295166015625 + [ + "semantic", + 7040847965650746591, + "TEXT", + "#/texts/135", + "text", + 0.98 ], - "iref": "#/texts/150", - "name": "text", - "orig-order": 186, - "page": 14, - "span": [ - 0, - 41 + [ + "language", + 7927601225025519287, + "TEXT", + "#/texts/136", + "en", + 0.88 ], - "sref": "#/page-elements/188", - "text-order": 188, - "type": "paragraph" - }, - { - "bbox": [ - 45.64805603027344, - 577.8392333984375, - 84.40357971191406, - 589.0214233398438 + [ + "semantic", + 7927601225025519287, + "TEXT", + "#/texts/136", + "text", + 1.0 ], - "iref": "#/texts/151", - "name": "subtitle-level-1", - "orig-order": 187, - "page": 14, - "span": [ - 0, - 5 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/137", + "en", + 0.78 ], - "sref": "#/page-elements/189", - "text-order": 189, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 45.716941833496094, - 539.067138671875, - 288.83966064453125, - 575.9967041015625 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/137", + "text", + 0.52 ], - "iref": "#/texts/152", - "name": "text", - "orig-order": 188, - "page": 14, - "span": [ - 0, - 160 + [ + "language", + 1080447728722590402, + "TEXT", + "#/texts/138", + "ja", + 0.13 ], - "sref": "#/page-elements/190", - "text-order": 190, - "type": "paragraph" - }, - { - "bbox": [ - 45.982421875, - 512.6180419921875, - 110.57768249511719, - 524.0657958984375 + [ + "semantic", + 1080447728722590402, + "TEXT", + "#/texts/138", + "text", + 1.0 ], - "iref": "#/texts/153", - "name": "subtitle-level-1", - "orig-order": 189, - "page": 14, - "span": [ - 0, - 8 + [ + "language", + 4361549257087816853, + "TEXT", + "#/texts/139", + "en", + 0.95 ], - "sref": "#/page-elements/191", - "text-order": 191, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 46.48820114135742, - 498.1862487792969, - 411.1214904785156, - 507.86468505859375 + [ + "semantic", + 4361549257087816853, + "TEXT", + "#/texts/139", + "meta-data", + 0.53 ], - "iref": "#/texts/154", - "name": "list-item", - "orig-order": 190, - "page": 14, - "span": [ - 0, - 99 + [ + "language", + 8207961846673301043, + "TEXT", + "#/texts/140", + "en", + 0.9 ], - "sref": "#/page-elements/192", - "text-order": 192, - "type": "paragraph" - }, - { - "bbox": [ - 46.17177200317383, - 472.4082946777344, - 552.9000854492188, - 493.8719482421875 + [ + "semantic", + 8207961846673301043, + "TEXT", + "#/texts/140", + "text", + 1.0 ], - "iref": "#/texts/155", - "name": "list-item", - "orig-order": 191, - "page": 14, - "span": [ - 0, - 285 + [ + "language", + 11998199584890640594, + "TEXT", + "#/texts/141", + "en", + 0.96 ], - "sref": "#/page-elements/193", - "text-order": 193, - "type": "paragraph" - }, - { - "bbox": [ - 46.39039993286133, - 457.71929931640625, - 129.30548095703125, - 468.0890197753906 + [ + "semantic", + 11998199584890640594, + "TEXT", + "#/texts/141", + "text", + 1.0 ], - "iref": "#/texts/156", - "name": "list-item", - "orig-order": 192, - "page": 14, - "span": [ - 0, - 24 + [ + "language", + 16446129547721407877, + "TEXT", + "#/texts/142", + "en", + 0.69 ], - "sref": "#/page-elements/194", - "text-order": 194, - "type": "paragraph" - }, - { - "bbox": [ - 45.71389389038086, - 443.1494140625, - 242.0704345703125, - 453.0476989746094 + [ + "semantic", + 16446129547721407877, + "TEXT", + "#/texts/142", + "header", + 0.97 ], - "iref": "#/texts/157", - "name": "list-item", - "orig-order": 193, - "page": 14, - "span": [ - 0, - 53 + [ + "language", + 6720443978031524294, + "TEXT", + "#/texts/143", + "en", + 0.89 ], - "sref": "#/page-elements/195", - "text-order": 195, - "type": "paragraph" - }, - { - "bbox": [ - 46.020606994628906, - 417.41619873046875, - 554.6400756835938, - 438.90777587890625 + [ + "semantic", + 6720443978031524294, + "TEXT", + "#/texts/143", + "text", + 0.98 ], - "iref": "#/texts/158", - "name": "list-item", - "orig-order": 194, - "page": 14, - "span": [ - 0, - 248 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/144", + "en", + 0.78 ], - "sref": "#/page-elements/196", - "text-order": 196, - "type": "paragraph" - }, - { - "bbox": [ - 46.48814010620117, - 402.9024353027344, - 321.26422119140625, - 412.63861083984375 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/144", + "text", + 0.52 ], - "iref": "#/texts/159", - "name": "list-item", - "orig-order": 195, - "page": 14, - "span": [ - 0, - 70 + [ + "language", + 2144926730621142072, + "TEXT", + "#/texts/145", + "pms", + 0.76 ], - "sref": "#/page-elements/197", - "text-order": 197, - "type": "paragraph" - }, - { - "bbox": [ - 46.00100326538086, - 376.937744140625, - 554.378662109375, - 398.0555114746094 + [ + "semantic", + 2144926730621142072, + "TEXT", + "#/texts/145", + "reference", + 0.62 ], - "iref": "#/texts/160", - "name": "list-item", - "orig-order": 196, - "page": 14, - "span": [ - 0, - 211 + [ + "language", + 14222671032550229818, + "TEXT", + "#/texts/146", + "en", + 0.88 ], - "sref": "#/page-elements/198", - "text-order": 198, - "type": "paragraph" - }, - { - "bbox": [ - 46.0579719543457, - 350.9154052734375, - 553.2630004882812, - 372.03350830078125 + [ + "semantic", + 14222671032550229818, + "TEXT", + "#/texts/146", + "text", + 0.97 ], - "iref": "#/texts/161", - "name": "list-item", - "orig-order": 197, - "page": 14, - "span": [ - 0, - 156 + [ + "language", + 17486770941839589126, + "TEXT", + "#/texts/147", + "en", + 0.98 ], - "sref": "#/page-elements/199", - "text-order": 199, - "type": "paragraph" - }, - { - "bbox": [ - 45.94832229614258, - 335.78765869140625, - 129.86572265625, - 346.3191833496094 + [ + "semantic", + 17486770941839589126, + "TEXT", + "#/texts/147", + "text", + 1.0 ], - "iref": "#/texts/162", - "name": "list-item", - "orig-order": 198, - "page": 14, - "span": [ - 0, - 25 + [ + "language", + 16574813224778118841, + "TEXT", + "#/texts/148", + "en", + 0.91 ], - "sref": "#/page-elements/200", - "text-order": 200, - "type": "paragraph" - }, - { - "bbox": [ - 45.82542419433594, - 321.9457092285156, - 234.11181640625, - 331.8630065917969 + [ + "semantic", + 16574813224778118841, + "TEXT", + "#/texts/148", + "text", + 0.99 ], - "iref": "#/texts/163", - "name": "list-item", - "orig-order": 199, - "page": 14, - "span": [ - 0, - 54 + [ + "language", + 3356142343274371864, + "TEXT", + "#/texts/149", + "en", + 0.19 ], - "sref": "#/page-elements/201", - "text-order": 201, - "type": "paragraph" - }, - { - "bbox": [ - 46.478782653808594, - 307.19293212890625, - 269.6688537597656, - 316.9698486328125 + [ + "semantic", + 3356142343274371864, + "TEXT", + "#/texts/149", + "header", + 1.0 ], - "iref": "#/texts/164", - "name": "list-item", - "orig-order": 200, - "page": 14, - "span": [ - 0, - 61 + [ + "language", + 4778022085288441371, + "TEXT", + "#/texts/150", + "en", + 0.94 + ], + [ + "semantic", + 4778022085288441371, + "TEXT", + "#/texts/150", + "text", + 0.96 + ], + [ + "language", + 4361549257598904601, + "TEXT", + "#/texts/151", + "it", + 0.35 ], - "sref": "#/page-elements/202", - "text-order": 202, - "type": "paragraph" - }, - { - "bbox": [ - 46.01924514770508, - 292.9189147949219, - 301.0096130371094, - 302.8531799316406 + [ + "semantic", + 4361549257598904601, + "TEXT", + "#/texts/151", + "header", + 0.72 ], - "iref": "#/texts/165", - "name": "list-item", - "orig-order": 201, - "page": 14, - "span": [ - 0, - 75 + [ + "language", + 3523281823889115814, + "TEXT", + "#/texts/152", + "en", + 0.29 ], - "sref": "#/page-elements/203", - "text-order": 203, - "type": "paragraph" - }, - { - "bbox": [ - 46.444217681884766, - 278.1666564941406, - 187.92904663085938, - 288.1064453125 + [ + "semantic", + 3523281823889115814, + "TEXT", + "#/texts/152", + "meta-data", + 0.58 ], - "iref": "#/texts/166", - "name": "list-item", - "orig-order": 202, - "page": 14, - "span": [ - 0, - 43 + [ + "language", + 8500729849894221215, + "TEXT", + "#/texts/153", + "en", + 0.3 ], - "sref": "#/page-elements/204", - "text-order": 204, - "type": "paragraph" - }, - { - "bbox": [ - 46.00947952270508, - 263.8026123046875, - 169.3743896484375, - 274.1329345703125 + [ + "semantic", + 8500729849894221215, + "TEXT", + "#/texts/153", + "header", + 1.0 ], - "iref": "#/texts/167", - "name": "list-item", - "orig-order": 203, - "page": 14, - "span": [ - 0, - 36 + [ + "language", + 7813503946963688644, + "TEXT", + "#/texts/154", + "en", + 0.48 ], - "sref": "#/page-elements/205", - "text-order": 205, - "type": "paragraph" - }, - { - "bbox": [ - 46.049869537353516, - 231.931396484375, - 123.2709732055664, - 244.548095703125 + [ + "semantic", + 7813503946963688644, + "TEXT", + "#/texts/154", + "text", + 0.97 ], - "iref": "#/texts/168", - "name": "subtitle-level-1", - "orig-order": 204, - "page": 14, - "span": [ - 0, - 10 + [ + "language", + 9230987401345399746, + "TEXT", + "#/texts/155", + "en", + 0.97 ], - "sref": "#/page-elements/206", - "text-order": 206, - "type": "subtitle-level-1" - }, - { - "bbox": [ - 50.6671142578125, - 207.4257049560547, - 552.3800659179688, - 228.917724609375 + [ + "semantic", + 9230987401345399746, + "TEXT", + "#/texts/155", + "text", + 1.0 ], - "iref": "#/texts/169", - "name": "list-item", - "orig-order": 205, - "page": 14, - "span": [ - 0, - 179 + [ + "language", + 1997735398126013155, + "TEXT", + "#/texts/156", + "en", + 0.65 ], - "sref": "#/page-elements/207", - "text-order": 207, - "type": "paragraph" - }, - { - "bbox": [ - 50.74010467529297, - 184.40769958496094, - 552.61669921875, - 205.76568603515625 + [ + "semantic", + 1997735398126013155, + "TEXT", + "#/texts/156", + "text", + 0.97 ], - "iref": "#/texts/170", - "name": "list-item", - "orig-order": 206, - "page": 14, - "span": [ - 0, - 163 + [ + "language", + 13566764974477978642, + "TEXT", + "#/texts/157", + "en", + 0.74 ], - "sref": "#/page-elements/208", - "text-order": 208, - "type": "paragraph" - }, - { - "bbox": [ - 50.74015808105469, - 161.3896942138672, - 552.6810302734375, - 182.65234375 + [ + "semantic", + 13566764974477978642, + "TEXT", + "#/texts/157", + "text", + 1.0 ], - "iref": "#/texts/171", - "name": "list-item", - "orig-order": 207, - "page": 14, - "span": [ - 0, - 168 + [ + "language", + 4925537010788978399, + "TEXT", + "#/texts/158", + "en", + 0.89 ], - "sref": "#/page-elements/209", - "text-order": 209, - "type": "paragraph" - }, - { - "bbox": [ - 50.16819763183594, - 126.91963195800781, - 552.5728759765625, - 159.62261962890625 + [ + "semantic", + 4925537010788978399, + "TEXT", + "#/texts/158", + "text", + 1.0 ], - "iref": "#/texts/172", - "name": "list-item", - "orig-order": 208, - "page": 14, - "span": [ - 0, - 292 + [ + "language", + 16552665876195410077, + "TEXT", + "#/texts/159", + "en", + 0.32 ], - "sref": "#/page-elements/210", - "text-order": 210, - "type": "paragraph" - }, - { - "bbox": [ - 50.49177551269531, - 103.90162658691406, - 553.5820922851562, - 124.90191650390625 + [ + "semantic", + 16552665876195410077, + "TEXT", + "#/texts/159", + "text", + 0.8 ], - "iref": "#/texts/173", - "name": "list-item", - "orig-order": 209, - "page": 14, - "span": [ - 0, - 171 + [ + "language", + 17579390613842440572, + "TEXT", + "#/texts/160", + "en", + 0.71 ], - "sref": "#/page-elements/211", - "text-order": 211, - "type": "paragraph" - }, - { - "bbox": [ - 50.74018859863281, - 92.39262390136719, - 436.9924011230469, - 101.68670654296875 + [ + "semantic", + 17579390613842440572, + "TEXT", + "#/texts/160", + "text", + 0.99 ], - "iref": "#/texts/174", - "name": "list-item", - "orig-order": 210, - "page": 14, - "span": [ - 0, - 102 + [ + "language", + 722212543953276862, + "TEXT", + "#/texts/161", + "en", + 0.93 ], - "sref": "#/page-elements/212", - "text-order": 212, - "type": "paragraph" - }, - { - "bbox": [ - 50.74017333984375, - 69.43157196044922, - 552.4933471679688, - 90.58172607421875 + [ + "semantic", + 722212543953276862, + "TEXT", + "#/texts/161", + "text", + 1.0 ], - "iref": "#/texts/175", - "name": "list-item", - "orig-order": 211, - "page": 14, - "span": [ - 0, - 156 + [ + "language", + 11085577343317113173, + "TEXT", + "#/texts/162", + "en", + 0.69 ], - "sref": "#/page-elements/213", - "text-order": 213, - "type": "paragraph" - }, - { - "bbox": [ - 50.37576675415039, - 46.413570404052734, - 553.1749267578125, - 67.59844970703125 + [ + "semantic", + 11085577343317113173, + "TEXT", + "#/texts/162", + "reference", + 0.95 ], - "iref": "#/texts/176", - "name": "list-item", - "orig-order": 212, - "page": 14, - "span": [ - 0, - 184 + [ + "language", + 1792096630133661292, + "TEXT", + "#/texts/163", + "pl", + 0.19 ], - "sref": "#/page-elements/214", - "text-order": 214, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "semantic", + 1792096630133661292, + "TEXT", + "#/texts/163", + "reference", + 0.94 ], - "iref": "#/texts/177", - "name": "text", - "orig-order": 215, - "page": 14, - "span": [ - 0, - 320 + [ + "language", + 11462638369524745676, + "TEXT", + "#/texts/164", + "en", + 0.91 ], - "sref": "#/page-elements/215", - "text-order": 215, - "type": "paragraph" - }, - { - "bbox": [ - 44.473201751708984, - 751.4635620117188, - 84.89160919189453, - 758.80615234375 + [ + "semantic", + 11462638369524745676, + "TEXT", + "#/texts/164", + "text", + 1.0 ], - "iref": "#/page-headers/18", - "name": "page-header", - "orig-order": 228, - "page": 15, - "span": [ - 0, - 13 + [ + "language", + 16611805225457383637, + "TEXT", + "#/texts/165", + "en", + 0.82 ], - "sref": "#/page-elements/216", - "text-order": 216, - "type": "page-header" - }, - { - "bbox": [ - 454.5641784667969, - 745.4571533203125, - 549.099365234375, - 761.863037109375 + [ + "semantic", + 16611805225457383637, + "TEXT", + "#/texts/165", + "reference", + 0.97 ], - "iref": "#/figures/8", - "name": "picture", - "orig-order": 229, - "page": 15, - "span": [ - 0, - 0 + [ + "language", + 1531505125666754945, + "TEXT", + "#/texts/166", + "en", + 0.26 ], - "sref": "#/page-elements/217", - "text-order": 217, - "type": "figure" - }, - { - "bbox": [ - 46.63217544555664, - 722.4282836914062, - 362.7469787597656, - 731.7239990234375 + [ + "semantic", + 1531505125666754945, + "TEXT", + "#/texts/166", + "reference", + 0.89 ], - "iref": "#/texts/178", - "name": "list-item", - "orig-order": 216, - "page": 15, - "span": [ - 0, - 85 + [ + "language", + 15684389308320953629, + "TEXT", + "#/texts/167", + "en", + 0.59 ], - "sref": "#/page-elements/218", - "text-order": 218, - "type": "paragraph" - }, - { - "bbox": [ - 44.78684997558594, - 699.5198364257812, - 549.7481689453125, - 720.4119262695312 + [ + "semantic", + 15684389308320953629, + "TEXT", + "#/texts/167", + "reference", + 0.95 ], - "iref": "#/texts/179", - "name": "list-item", - "orig-order": 217, - "page": 15, - "span": [ - 0, - 168 + [ + "language", + 14590754343934702701, + "TEXT", + "#/texts/168", + "en", + 0.33 ], - "sref": "#/page-elements/219", - "text-order": 219, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 688.0108642578125, - 238.66644287109375, - 697.144287109375 + [ + "semantic", + 14590754343934702701, + "TEXT", + "#/texts/168", + "reference", + 0.62 ], - "iref": "#/texts/180", - "name": "list-item", - "orig-order": 218, - "page": 15, - "span": [ - 0, - 50 + [ + "language", + 10480452763767134455, + "TEXT", + "#/texts/169", + "en", + 0.51 ], - "sref": "#/page-elements/220", - "text-order": 220, - "type": "paragraph" - }, - { - "bbox": [ - 44.54977798461914, - 676.5018920898438, - 243.0414581298828, - 685.6976318359375 + [ + "semantic", + 10480452763767134455, + "TEXT", + "#/texts/169", + "reference", + 0.96 ], - "iref": "#/texts/181", - "name": "list-item", - "orig-order": 219, - "page": 15, - "span": [ - 0, - 52 + [ + "language", + 11866471329779366855, + "TEXT", + "#/texts/170", + "en", + 0.49 ], - "sref": "#/page-elements/221", - "text-order": 221, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 653.5408935546875, - 548.7638549804688, - 674.378662109375 + [ + "semantic", + 11866471329779366855, + "TEXT", + "#/texts/170", + "reference", + 0.9 ], - "iref": "#/texts/182", - "name": "list-item", - "orig-order": 220, - "page": 15, - "span": [ - 0, - 145 + [ + "language", + 6016885898370676469, + "TEXT", + "#/texts/171", + "en", + 0.7 ], - "sref": "#/page-elements/222", - "text-order": 222, - "type": "paragraph" - }, - { - "bbox": [ - 44.7877197265625, - 630.52294921875, - 548.82861328125, - 651.5768432617188 + [ + "semantic", + 6016885898370676469, + "TEXT", + "#/texts/171", + "reference", + 0.93 ], - "iref": "#/texts/183", - "name": "list-item", - "orig-order": 221, - "page": 15, - "span": [ - 0, - 252 + [ + "language", + 13946275785662847920, + "TEXT", + "#/texts/172", + "en", + 0.62 ], - "sref": "#/page-elements/223", - "text-order": 223, - "type": "paragraph" - }, - { - "bbox": [ - 44.787750244140625, - 607.5050048828125, - 550.8438720703125, - 628.0836181640625 + [ + "semantic", + 13946275785662847920, + "TEXT", + "#/texts/172", + "reference", + 0.98 ], - "iref": "#/texts/184", - "name": "list-item", - "orig-order": 222, - "page": 15, - "span": [ - 0, - 147 + [ + "language", + 7693798302433367973, + "TEXT", + "#/texts/173", + "en", + 0.5 ], - "sref": "#/page-elements/224", - "text-order": 224, - "type": "paragraph" - }, - { - "bbox": [ - 44.787750244140625, - 595.9960327148438, - 474.9829406738281, - 604.6593627929688 + [ + "semantic", + 7693798302433367973, + "TEXT", + "#/texts/173", + "reference", + 0.97 ], - "iref": "#/texts/185", - "name": "list-item", - "orig-order": 223, - "page": 15, - "span": [ - 0, - 114 + [ + "language", + 3109792572574236398, + "TEXT", + "#/texts/174", + "en", + 0.68 + ], + [ + "semantic", + 3109792572574236398, + "TEXT", + "#/texts/174", + "reference", + 0.94 ], - "sref": "#/page-elements/225", - "text-order": 225, - "type": "paragraph" - }, - { - "bbox": [ - 44.786895751953125, - 573.0350341796875, - 548.8020629882812, - 592.54248046875 + [ + "language", + 8111170387462350170, + "TEXT", + "#/texts/175", + "en", + 0.75 ], - "iref": "#/texts/186", - "name": "list-item", - "orig-order": 224, - "page": 15, - "span": [ - 0, - 197 + [ + "semantic", + 8111170387462350170, + "TEXT", + "#/texts/175", + "reference", + 0.83 ], - "sref": "#/page-elements/226", - "text-order": 226, - "type": "paragraph" - }, - { - "bbox": [ - 44.786865234375, - 550.01708984375, - 548.7230834960938, - 569.8275146484375 + [ + "language", + 14682702346227170925, + "TEXT", + "#/texts/176", + "en", + 0.49 ], - "iref": "#/texts/187", - "name": "list-item", - "orig-order": 225, - "page": 15, - "span": [ - 0, - 142 + [ + "semantic", + 14682702346227170925, + "TEXT", + "#/texts/176", + "reference", + 0.92 ], - "sref": "#/page-elements/227", - "text-order": 227, - "type": "paragraph" - }, - { - "bbox": [ - 44.78601837158203, - 526.9991455078125, - 550.565185546875, - 546.7464599609375 + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/177", + "en", + 0.78 ], - "iref": "#/texts/188", - "name": "list-item", - "orig-order": 226, - "page": 15, - "span": [ - 0, - 176 + [ + "semantic", + 18391264192891079539, + "TEXT", + "#/texts/177", + "text", + 0.52 ], - "sref": "#/page-elements/228", - "text-order": 228, - "type": "paragraph" - }, - { - "bbox": [ - 57.16337966918945, - 468.5407409667969, - 529.73583984375, - 491.138916015625 + [ + "language", + 11430385775112165283, + "TEXT", + "#/texts/178", + "en", + 0.67 ], - "iref": "#/texts/189", - "name": "text", - "orig-order": 227, - "page": 15, - "span": [ - 0, - 216 + [ + "semantic", + 11430385775112165283, + "TEXT", + "#/texts/178", + "reference", + 0.99 ], - "sref": "#/page-elements/229", - "text-order": 229, - "type": "paragraph" - }, - { - "bbox": [ - 578.368896484375, - 15.450490951538086, - 583.4779663085938, - 766.7100219726562 + [ + "language", + 5825495964576843004, + "TEXT", + "#/texts/179", + "en", + 0.49 ], - "iref": "#/texts/190", - "name": "text", - "orig-order": 230, - "page": 15, - "span": [ - 0, - 320 + [ + "semantic", + 5825495964576843004, + "TEXT", + "#/texts/179", + "reference", + 0.97 ], - "sref": "#/page-elements/230", - "text-order": 230, - "type": "paragraph" - } - ], - "page-footers": [ - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/0", - "hash": 12400883656433726216, - "orig": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "prov": [ - { - "$ref": "#/page-elements/21" - } + [ + "language", + 5698421097735371040, + "TEXT", + "#/texts/180", + "en", + 0.31 ], - "text": "Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "text-hash": 8372141692634509619, - "type": "page-footer" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-footers/1", - "hash": 10244115652970867690, - "orig": "wileyonlinelibrary.com/journal/ail2 1of15", - "prov": [ - { - "$ref": "#/page-elements/22" - } + [ + "semantic", + 5698421097735371040, + "TEXT", + "#/texts/180", + "reference", + 0.92 ], - "text": "wileyonlinelibrary.com/journal/ail2 1of15", - "text-hash": 6196517219334265105, - "type": "page-footer" - } - ], - "page-headers": [ - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/0", - "hash": 1841431076736563689, - "orig": "Received: 15 September 2020", - "prov": [ - { - "$ref": "#/page-elements/0" - } + [ + "language", + 5870535063942256428, + "TEXT", + "#/texts/181", + "en", + 0.44 ], - "text": "Received: 15 September 2020", - "text-hash": 16688788223092401940, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/1", - "hash": 3915126318503464014, - "orig": "Revised: 23 November 2020", - "prov": [ - { - "$ref": "#/page-elements/1" - } + [ + "semantic", + 5870535063942256428, + "TEXT", + "#/texts/181", + "reference", + 0.79 ], - "text": "Revised: 23 November 2020", - "text-hash": 1000711515083668085, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/2", - "hash": 1727876228376027809, - "orig": "Accepted: 25 November 2020", - "prov": [ - { - "$ref": "#/page-elements/2" - } + [ + "language", + 18196767266655606709, + "TEXT", + "#/texts/182", + "en", + 0.68 ], - "text": "Accepted: 25 November 2020", - "text-hash": 17099649843681009628, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/3", - "hash": 4558221577189246496, - "orig": "DOI: 10.1002/ail2.20", - "prov": [ - { - "$ref": "#/page-elements/3" - } + [ + "semantic", + 18196767266655606709, + "TEXT", + "#/texts/182", + "reference", + 0.99 ], - "text": "DOI: 10.1002/ail2.20", - "text-hash": 348625343742526555, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/4", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/24" - } + [ + "language", + 3623403683642367845, + "TEXT", + "#/texts/183", + "en", + 0.44 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/5", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/43" - } + [ + "semantic", + 3623403683642367845, + "TEXT", + "#/texts/183", + "reference", + 0.93 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/6", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/55" - } + [ + "language", + 13936866850854297069, + "TEXT", + "#/texts/184", + "en", + 0.59 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/7", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/66" - } + [ + "semantic", + 13936866850854297069, + "TEXT", + "#/texts/184", + "reference", + 0.94 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/8", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/76" - } + [ + "language", + 8497015665124263236, + "TEXT", + "#/texts/185", + "en", + 0.4 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/9", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/92" - } + [ + "semantic", + 8497015665124263236, + "TEXT", + "#/texts/185", + "reference", + 1.0 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/10", - "hash": 4361549266732238272, - "orig": "8of15", - "prov": [ - { - "$ref": "#/page-elements/106" - } + [ + "language", + 15947529491299956047, + "TEXT", + "#/texts/186", + "en", + 0.62 ], - "text": "8of15", - "text-hash": 329104147727696635, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/11", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/107" - } + [ + "semantic", + 15947529491299956047, + "TEXT", + "#/texts/186", + "reference", + 0.99 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/12", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/117" - } + [ + "language", + 14843401725435831033, + "TEXT", + "#/texts/187", + "en", + 0.62 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/13", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/131" - } + [ + "semantic", + 14843401725435831033, + "TEXT", + "#/texts/187", + "reference", + 0.98 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/14", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/145" - } + [ + "language", + 16676439669743530711, + "TEXT", + "#/texts/188", + "en", + 0.55 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/15", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/156" - } + [ + "semantic", + 16676439669743530711, + "TEXT", + "#/texts/188", + "reference", + 0.99 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/16", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/170" - } + [ + "language", + 2986547206451163051, + "TEXT", + "#/texts/189", + "en", + 0.55 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/17", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/183" - } + [ + "semantic", + 2986547206451163051, + "TEXT", + "#/texts/189", + "reference", + 0.75 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - }, - { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/page-headers/18", - "hash": 8492015887072434396, - "orig": "STAAR ET AL.", - "prov": [ - { - "$ref": "#/page-elements/216" - } + [ + "language", + 18391264192891079539, + "TEXT", + "#/texts/190", + "en", + 0.78 ], - "text": "STAAR ET AL.", - "text-hash": 14658966106383255015, - "type": "page-header" - } - ], - "properties": { - "data": [ [ "semantic", + 18391264192891079539, + "TEXT", + "#/texts/190", "text", - 0.8700000047683716 + 0.52 ] ], "headers": [ "type", + "subj_hash", + "subj_name", + "subj_path", "label", "confidence" ] }, + "sref": "#", + "subj_hash": 18446744073709551615, "tables": [ { "#-cols": 6, @@ -18419,28 +85245,29 @@ "captions": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/146", - "hash": 8669048055071941045, "orig": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", "prov": [ { "$ref": "#/page-elements/175" } ], + "sref": "#/tables/0/captions/0", + "subj_hash": 8669048055071941045, "text": "TABLE 1 Top-k accuracies validation of KG query results. Numbers represent the fraction in which any of the k highest ranked answers matches the expected answer", - "text-hash": 14400864471075544784, + "text_hash": 14400864471075544784, "type": "caption" } ], - "confidence": 0.9599999785423279, + "confidence": 0.96, "created_by": "high_conf_pred", "data": [ [ { "bbox": [ - 212.76950073242188, - 485.32318115234375, - 228.2480010986328, - 493.3896789550781 + 212.77, + 485.32, + 228.25, + 493.39 ], "col": 0, "col-header": false, @@ -18465,10 +85292,10 @@ }, { "bbox": [ - 280.4609375, - 485.32318115234375, - 315.0389404296875, - 493.3896789550781 + 280.46, + 485.32, + 315.04, + 493.39 ], "col": 1, "col-header": false, @@ -18493,10 +85320,10 @@ }, { "bbox": [ - 352.3488464355469, - 485.32318115234375, - 374.287353515625, - 493.3896789550781 + 352.35, + 485.32, + 374.29, + 493.39 ], "col": 2, "col-header": false, @@ -18521,10 +85348,10 @@ }, { "bbox": [ - 408.192138671875, - 485.32318115234375, - 430.1306457519531, - 493.3896789550781 + 408.19, + 485.32, + 430.13, + 493.39 ], "col": 3, "col-header": false, @@ -18549,10 +85376,10 @@ }, { "bbox": [ - 464.03546142578125, - 485.32318115234375, - 485.9739685058594, - 493.3896789550781 + 464.04, + 485.32, + 485.97, + 493.39 ], "col": 4, "col-header": false, @@ -18577,10 +85404,10 @@ }, { "bbox": [ - 519.8218383789062, - 485.32318115234375, - 541.7603149414062, - 493.3896789550781 + 519.82, + 485.32, + 541.76, + 493.39 ], "col": 5, "col-header": false, @@ -18607,10 +85434,10 @@ [ { "bbox": [ - 212.76950073242188, - 469.68743896484375, - 246.57400512695312, - 477.6859436035156 + 212.77, + 469.69, + 246.57, + 477.69 ], "col": 0, "col-header": false, @@ -18635,10 +85462,10 @@ }, { "bbox": [ - 280.4617919921875, - 469.68743896484375, - 294.4443054199219, - 477.6859436035156 + 280.46, + 469.69, + 294.44, + 477.69 ], "col": 1, "col-header": false, @@ -18663,10 +85490,10 @@ }, { "bbox": [ - 352.3488464355469, - 469.68743896484375, - 366.849853515625, - 477.6859436035156 + 352.35, + 469.69, + 366.85, + 477.69 ], "col": 2, "col-header": false, @@ -18691,10 +85518,10 @@ }, { "bbox": [ - 408.192138671875, - 469.68743896484375, - 422.6931457519531, - 477.6859436035156 + 408.19, + 469.69, + 422.69, + 477.69 ], "col": 3, "col-header": false, @@ -18719,10 +85546,10 @@ }, { "bbox": [ - 464.0354309082031, - 469.68743896484375, - 478.53643798828125, - 477.6859436035156 + 464.04, + 469.69, + 478.54, + 477.69 ], "col": 4, "col-header": false, @@ -18747,10 +85574,10 @@ }, { "bbox": [ - 519.82177734375, - 469.68743896484375, - 534.32275390625, - 477.6859436035156 + 519.82, + 469.69, + 534.32, + 477.69 ], "col": 5, "col-header": false, @@ -18800,10 +85627,10 @@ }, { "bbox": [ - 280.4609375, - 454.6636962890625, - 318.4134521484375, - 462.6622009277344 + 280.46, + 454.66, + 318.41, + 462.66 ], "col": 1, "col-header": false, @@ -18828,10 +85655,10 @@ }, { "bbox": [ - 352.3488464355469, - 454.6636962890625, - 366.849853515625, - 462.6622009277344 + 352.35, + 454.66, + 366.85, + 462.66 ], "col": 2, "col-header": false, @@ -18856,10 +85683,10 @@ }, { "bbox": [ - 408.192138671875, - 454.6636962890625, - 422.6931457519531, - 462.6622009277344 + 408.19, + 454.66, + 422.69, + 462.66 ], "col": 3, "col-header": false, @@ -18884,10 +85711,10 @@ }, { "bbox": [ - 464.0354309082031, - 454.6636962890625, - 478.53643798828125, - 462.6622009277344 + 464.04, + 454.66, + 478.54, + 462.66 ], "col": 4, "col-header": false, @@ -18912,10 +85739,10 @@ }, { "bbox": [ - 519.82177734375, - 454.6636962890625, - 534.32275390625, - 462.6622009277344 + 519.82, + 454.66, + 534.32, + 462.66 ], "col": 5, "col-header": false, @@ -18965,10 +85792,10 @@ }, { "bbox": [ - 280.4609375, - 439.63995361328125, - 298.6849365234375, - 447.6384582519531 + 280.46, + 439.64, + 298.68, + 447.64 ], "col": 1, "col-header": false, @@ -18993,10 +85820,10 @@ }, { "bbox": [ - 352.3479919433594, - 439.63995361328125, - 366.8489990234375, - 447.6384582519531 + 352.35, + 439.64, + 366.85, + 447.64 ], "col": 2, "col-header": false, @@ -19021,10 +85848,10 @@ }, { "bbox": [ - 408.1912841796875, - 439.63995361328125, - 422.6922912597656, - 447.6384582519531 + 408.19, + 439.64, + 422.69, + 447.64 ], "col": 3, "col-header": false, @@ -19049,10 +85876,10 @@ }, { "bbox": [ - 464.0345764160156, - 439.63995361328125, - 478.53558349609375, - 447.6384582519531 + 464.03, + 439.64, + 478.54, + 447.64 ], "col": 4, "col-header": false, @@ -19077,10 +85904,10 @@ }, { "bbox": [ - 519.8209228515625, - 439.63995361328125, - 534.3218994140625, - 447.6384582519531 + 519.82, + 439.64, + 534.32, + 447.64 ], "col": 5, "col-header": false, @@ -19107,10 +85934,10 @@ [ { "bbox": [ - 212.7694854736328, - 424.67315673828125, - 227.3724822998047, - 432.6716613769531 + 212.77, + 424.67, + 227.37, + 432.67 ], "col": 0, "col-header": false, @@ -19135,10 +85962,10 @@ }, { "bbox": [ - 280.4617919921875, - 424.67315673828125, - 294.4443054199219, - 432.6716613769531 + 280.46, + 424.67, + 294.44, + 432.67 ], "col": 1, "col-header": false, @@ -19163,10 +85990,10 @@ }, { "bbox": [ - 352.3488464355469, - 424.67315673828125, - 366.849853515625, - 432.6716613769531 + 352.35, + 424.67, + 366.85, + 432.67 ], "col": 2, "col-header": false, @@ -19191,10 +86018,10 @@ }, { "bbox": [ - 408.192138671875, - 424.67315673828125, - 422.6931457519531, - 432.6716613769531 + 408.19, + 424.67, + 422.69, + 432.67 ], "col": 3, "col-header": false, @@ -19219,10 +86046,10 @@ }, { "bbox": [ - 464.0354309082031, - 424.67315673828125, - 478.53643798828125, - 432.6716613769531 + 464.04, + 424.67, + 478.54, + 432.67 ], "col": 4, "col-header": false, @@ -19247,10 +86074,10 @@ }, { "bbox": [ - 519.82177734375, - 424.67315673828125, - 534.32275390625, - 432.6716613769531 + 519.82, + 424.67, + 534.32, + 432.67 ], "col": 5, "col-header": false, @@ -19300,10 +86127,10 @@ }, { "bbox": [ - 280.4609375, - 409.6494140625, - 318.4134521484375, - 417.6479187011719 + 280.46, + 409.65, + 318.41, + 417.65 ], "col": 1, "col-header": false, @@ -19328,10 +86155,10 @@ }, { "bbox": [ - 352.3488464355469, - 409.6494140625, - 366.849853515625, - 417.6479187011719 + 352.35, + 409.65, + 366.85, + 417.65 ], "col": 2, "col-header": false, @@ -19356,10 +86183,10 @@ }, { "bbox": [ - 408.192138671875, - 409.6494140625, - 422.6931457519531, - 417.6479187011719 + 408.19, + 409.65, + 422.69, + 417.65 ], "col": 3, "col-header": false, @@ -19384,10 +86211,10 @@ }, { "bbox": [ - 464.0354309082031, - 409.6494140625, - 478.53643798828125, - 417.6479187011719 + 464.04, + 409.65, + 478.54, + 417.65 ], "col": 4, "col-header": false, @@ -19412,10 +86239,10 @@ }, { "bbox": [ - 519.82177734375, - 409.6494140625, - 534.32275390625, - 417.6479187011719 + 519.82, + 409.65, + 534.32, + 417.65 ], "col": 5, "col-header": false, @@ -19465,10 +86292,10 @@ }, { "bbox": [ - 280.4609375, - 394.6826171875, - 298.6849365234375, - 402.6811218261719 + 280.46, + 394.68, + 298.68, + 402.68 ], "col": 1, "col-header": false, @@ -19493,10 +86320,10 @@ }, { "bbox": [ - 352.3479919433594, - 394.6826171875, - 366.8489990234375, - 402.6811218261719 + 352.35, + 394.68, + 366.85, + 402.68 ], "col": 2, "col-header": false, @@ -19521,10 +86348,10 @@ }, { "bbox": [ - 408.1912841796875, - 394.6826171875, - 422.6922912597656, - 402.6811218261719 + 408.19, + 394.68, + 422.69, + 402.68 ], "col": 3, "col-header": false, @@ -19549,10 +86376,10 @@ }, { "bbox": [ - 464.0345764160156, - 394.6826171875, - 478.53558349609375, - 402.6811218261719 + 464.03, + 394.68, + 478.54, + 402.68 ], "col": 4, "col-header": false, @@ -19577,10 +86404,10 @@ }, { "bbox": [ - 519.8209228515625, - 394.6826171875, - 534.3218994140625, - 402.6811218261719 + 519.82, + 394.68, + 534.32, + 402.68 ], "col": 5, "col-header": false, @@ -19607,10 +86434,10 @@ [ { "bbox": [ - 212.7694854736328, - 379.65887451171875, - 236.92648315429688, - 387.6573791503906 + 212.77, + 379.66, + 236.93, + 387.66 ], "col": 0, "col-header": false, @@ -19635,10 +86462,10 @@ }, { "bbox": [ - 280.4617919921875, - 379.65887451171875, - 294.4443054199219, - 387.6573791503906 + 280.46, + 379.66, + 294.44, + 387.66 ], "col": 1, "col-header": false, @@ -19663,10 +86490,10 @@ }, { "bbox": [ - 352.3488464355469, - 379.65887451171875, - 366.849853515625, - 387.6573791503906 + 352.35, + 379.66, + 366.85, + 387.66 ], "col": 2, "col-header": false, @@ -19691,10 +86518,10 @@ }, { "bbox": [ - 408.192138671875, - 379.65887451171875, - 422.6931457519531, - 387.6573791503906 + 408.19, + 379.66, + 422.69, + 387.66 ], "col": 3, "col-header": false, @@ -19719,10 +86546,10 @@ }, { "bbox": [ - 464.0354309082031, - 379.65887451171875, - 478.53643798828125, - 387.6573791503906 + 464.04, + 379.66, + 478.54, + 387.66 ], "col": 4, "col-header": false, @@ -19747,10 +86574,10 @@ }, { "bbox": [ - 519.82177734375, - 379.65887451171875, - 534.32275390625, - 387.6573791503906 + 519.82, + 379.66, + 534.32, + 387.66 ], "col": 5, "col-header": false, @@ -19800,10 +86627,10 @@ }, { "bbox": [ - 280.4609375, - 364.69207763671875, - 318.4134521484375, - 372.6905822753906 + 280.46, + 364.69, + 318.41, + 372.69 ], "col": 1, "col-header": false, @@ -19828,10 +86655,10 @@ }, { "bbox": [ - 352.3488464355469, - 364.69207763671875, - 366.849853515625, - 372.6905822753906 + 352.35, + 364.69, + 366.85, + 372.69 ], "col": 2, "col-header": false, @@ -19856,10 +86683,10 @@ }, { "bbox": [ - 408.192138671875, - 364.69207763671875, - 422.6931457519531, - 372.6905822753906 + 408.19, + 364.69, + 422.69, + 372.69 ], "col": 3, "col-header": false, @@ -19884,10 +86711,10 @@ }, { "bbox": [ - 464.0354309082031, - 364.69207763671875, - 478.53643798828125, - 372.6905822753906 + 464.04, + 364.69, + 478.54, + 372.69 ], "col": 4, "col-header": false, @@ -19912,10 +86739,10 @@ }, { "bbox": [ - 519.82177734375, - 364.69207763671875, - 534.32275390625, - 372.6905822753906 + 519.82, + 364.69, + 534.32, + 372.69 ], "col": 5, "col-header": false, @@ -19965,10 +86792,10 @@ }, { "bbox": [ - 280.4609375, - 349.6683349609375, - 298.6849365234375, - 357.6668395996094 + 280.46, + 349.67, + 298.68, + 357.67 ], "col": 1, "col-header": false, @@ -19993,10 +86820,10 @@ }, { "bbox": [ - 352.3479919433594, - 349.6683349609375, - 366.8489990234375, - 357.6668395996094 + 352.35, + 349.67, + 366.85, + 357.67 ], "col": 2, "col-header": false, @@ -20021,10 +86848,10 @@ }, { "bbox": [ - 408.1912841796875, - 349.6683349609375, - 422.6922912597656, - 357.6668395996094 + 408.19, + 349.67, + 422.69, + 357.67 ], "col": 3, "col-header": false, @@ -20049,10 +86876,10 @@ }, { "bbox": [ - 464.0345764160156, - 349.6683349609375, - 478.53558349609375, - 357.6668395996094 + 464.03, + 349.67, + 478.54, + 357.67 ], "col": 4, "col-header": false, @@ -20077,10 +86904,10 @@ }, { "bbox": [ - 519.8209228515625, - 349.6683349609375, - 534.3218994140625, - 357.6668395996094 + 519.82, + 349.67, + 534.32, + 357.67 ], "col": 5, "col-header": false, @@ -20107,5172 +86934,2690 @@ ], "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/tables/0", "footnotes": [], - "hash": 12469893451248582632, "mentions": [], "prov": [ { "$ref": "#/page-elements/176" } ], + "sref": "#/tables/0", + "subj_hash": 12469893451248582632, "type": "table" } ], "texts": [ { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/0", - "hash": 2144509362215609527, "orig": "LETTER", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/4" } ], + "sref": "#/texts/0", + "subj_hash": 2144509362215609527, "text": "LETTER", - "text-hash": 16381206540184854990, + "text_hash": 16381206540184854990, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/1", - "hash": 16672720454366774824, "orig": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/5" } ], + "sref": "#/texts/1", + "subj_hash": 16672720454366774824, "text": "Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora", - "text-hash": 4375081646508065875, + "text_hash": 4375081646508065875, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/2", - "hash": 16781763356419781679, "orig": "Peter W. J. Staar", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/6" } ], + "sref": "#/texts/2", + "subj_hash": 16781763356419781679, "text": "Peter W. J. Staar", - "text-hash": 4049808513512976982, + "text_hash": 4049808513512976982, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/3", - "hash": 3352447812305581329, "orig": "|", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/7" } ], + "sref": "#/texts/3", + "subj_hash": 3352447812305581329, "text": "|", - "text-hash": 17767354399704232748, + "text_hash": 17767354399704232748, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/4", - "hash": 14877831450145300436, "orig": "Michele Dolfi", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/8" } ], + "sref": "#/texts/4", + "subj_hash": 14877831450145300436, "text": "Michele Dolfi", - "text-hash": 1571808557594152175, + "text_hash": 1571808557594152175, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/5", - "hash": 3352447812305581329, "orig": "|", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/9" } ], + "sref": "#/texts/5", + "subj_hash": 3352447812305581329, "text": "|", - "text-hash": 17767354399704232748, + "text_hash": 17767354399704232748, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/6", - "hash": 13336841394978214677, "orig": "Christoph Auer", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/10" } ], + "sref": "#/texts/6", + "subj_hash": 13336841394978214677, "text": "Christoph Auer", - "text-hash": 9737597816447750448, + "text_hash": 9737597816447750448, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/7", - "hash": 15325526562897377208, "orig": "IBM Research, Rueschlikon, Switzerland", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/11" } ], + "sref": "#/texts/7", + "subj_hash": 15325526562897377208, "text": "IBM Research, Rueschlikon, Switzerland", - "text-hash": 3204757815416943811, + "text_hash": 3204757815416943811, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/8", - "hash": 4017434568255781081, "orig": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/12" } ], + "sref": "#/texts/8", + "subj_hash": 4017434568255781081, "text": "Correspondence Peter W. J. Staar, IBM Research, Saumerstrasse 4, 8820 Rueschlikon, Switzerland. Email: taa@zurich.ibm.com", - "text-hash": 961470147553945060, + "text_hash": 961470147553945060, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/9", - "hash": 8487024695951375934, "orig": "Abstract", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/13" } ], + "sref": "#/texts/9", + "subj_hash": 8487024695951375934, "text": "Abstract", - "text-hash": 14650447666970618949, + "text_hash": 14650447666970618949, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/10", - "hash": 11695737263227886476, "orig": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/14" } ], + "sref": "#/texts/10", + "subj_hash": 11695737263227886476, "text": "Knowledge Graphs have been fast emerging as the de facto standard to model and explore knowledge in weakly structured data. Large corpora of documents constitute a source of weakly structured data of particular interest for both the academic and business world. Key examples include scientific publications, technical reports, manuals, patents, regulations, etc. Such corpora embed many facts that are elementary to critical decision making or enabling new discoveries. In this paper, we present a scalable cloud platform to create and serve Knowledge Graphs, which we named corpus processing service (CPS). Its purpose is to process large document corpora, extract the content and embedded facts, and ultimately represent these in a consistent knowledge graph that can be intuitively queried. To accomplish this, we use state-of-the-art natural language understanding models to extract entities and relationships from documents converted with our previously presented corpus conversion service platform. This pipeline is complemented with a newly developed graph engine which ensures extremely performant graph queries and provides powerful graph analytics capabilities. Both components are tightly integrated and can be easily consumed through REST APIs. Additionally, we provide user interfaces to control the data ingestion flow and formulate queries using a visual programming approach. The CPS platform is designed as a modular microservice system operating on Kubernetes clusters. Finally, we validate the quality of queries on our endto-end knowledge pipeline in a real-world application in the oil and gas industry.", - "text-hash": 9356514212507371703, + "text_hash": 9356514212507371703, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/11", - "hash": 8500733160758672230, "orig": "KEYWORDS", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/15" } ], + "sref": "#/texts/11", + "subj_hash": 8500733160758672230, "text": "KEYWORDS", - "text-hash": 14650267244735310237, + "text_hash": 14650267244735310237, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/12", - "hash": 4452030907228745864, "orig": "document processing, knowledge graph, semantic search", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8700000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/16" } ], + "sref": "#/texts/12", + "subj_hash": 4452030907228745864, "text": "document processing, knowledge graph, semantic search", - "text-hash": 243147861724212659, + "text_hash": 243147861724212659, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/13", - "hash": 11913688961435238004, "orig": "1 | INTRODUCTION", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/17" } ], + "sref": "#/texts/13", + "subj_hash": 11913688961435238004, "text": "1 | INTRODUCTION", - "text-hash": 8854903187485535375, + "text_hash": 8854903187485535375, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/14", - "hash": 9977041563469582014, "orig": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/18" } ], + "sref": "#/texts/14", + "subj_hash": 9977041563469582014, "text": "As of 2015, Adobe estimated that there were 2.7 trillion PDF documents in circulation globally. It is self-evident that this number has increased ever since. The explosive growth of documents one can observe since digital publishing became mainstream is posing a serious challenge to both the academic and corporate world. The increased publication rate of scientific articles makes it harder and harder for academics to keep aware of all the latest findings. Similarly, the ever-growing number of internal reports, documentation, patents, contracts, regulations, court filings, etc., is for most corporations becoming simply unmanageable.", - "text-hash": 6468010182398147525, + "text_hash": 6468010182398147525, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/15", - "hash": 4361549266817300114, "orig": "2of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/23" } ], + "sref": "#/texts/15", + "subj_hash": 4361549266817300114, "text": "2of15", - "text-hash": 329104147827159977, + "text_hash": 329104147827159977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/16", - "hash": 8425126282903547933, "orig": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/25" } ], + "sref": "#/texts/16", + "subj_hash": 8425126282903547933, "text": "In a previous publication, we presented the corpus conversion service (CCS). 1 The CCS is a scalable cloud service, which leverages state-of-the-art machine learning to convert complex formats (eg, PDF, Word, and Bitmap) into a richly structured JSON representation of their content. As such, the CCS solves the first problem when confronted with a large corpus of documents, that is, make the content of the documents programmatically accessible. Examples of the latter would be ' List all images with their caption from the corpus or list all titles with their publication date. ' The second problem is to obviously search or explore the content of the documents in a large corpus. For this problem, we have developed the corpus processing service (CPS), which we present in this paper. The CPS is intended to create knowledge bases (KBs) from the converted JSON corpus and serve these KBs through in-memory knowledge graph stores. As such, the CPS is the natural extension of the CCS and has as an express purpose to make corpora of documents available for deep data exploration.", - "text-hash": 14716796829201051176, + "text_hash": 14716796829201051176, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/17", - "hash": 16507313240019459642, "orig": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/26" } ], + "sref": "#/texts/17", + "subj_hash": 16507313240019459642, "text": "The purpose of CPS is to enable deep data exploration directly on large corpora. Here, we define deep data exploration as the capability to ingest large corpora of documents into a scalable service and detect, extract and combine facts contained in these corpora in order to make new discoveries or support critical decision making. It is key to understand that our goal of creating and querying Knowledge Graphs to enable deep data exploration goes beyond search in the spirit of rank and retrieve. Although search is by no means trivial, many state-of-the art solutions exist for this purpose. * We argue, however, that one needs query capabilities which allow for a combination of extracted facts and a fast, onthe-fly creation of new datasets to enable actual deep data exploration. Those datasets can then be used for further anal-", - "text-hash": 4261190952114998337, + "text_hash": 4261190952114998337, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/18", - "hash": 7900229969942228522, "orig": "ysis, which might lead to new discoveries or support decision making.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/27" } ], + "sref": "#/texts/18", + "subj_hash": 7900229969942228522, "text": "ysis, which might lead to new discoveries or support decision making.", - "text-hash": 12931323242585971793, + "text_hash": 12931323242585971793, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/19", - "hash": 10081303962589804251, "orig": "To better distinguish this approach from conventional search, let us consider some example questions:", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/28" } ], + "sref": "#/texts/19", + "subj_hash": 10081303962589804251, "text": "To better distinguish this approach from conventional search, let us consider some example questions:", - "text-hash": 6426882630003520482, + "text_hash": 6426882630003520482, "type": "paragraph" }, { - "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/20", - "hash": 12186698460099365002, - "orig": "a. Definition of high temperature superconductor.", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.49000000953674316 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, + "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/20", + "orig": "a. Definition of high temperature superconductor.", "prov": [ { "$ref": "#/page-elements/29" } ], + "sref": "#/texts/20", + "subj_hash": 12186698460099365002, "text": "a. Definition of high temperature superconductor.", - "text-hash": 8586326920090596785, + "text_hash": 8586326920090596785, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/21", - "hash": 14190244699299580163, "orig": "b. Publications of before year 2010.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/30" } ], + "sref": "#/texts/21", + "subj_hash": 14190244699299580163, "text": "b. Publications of before year 2010.", - "text-hash": 2034196463390881594, + "text_hash": 2034196463390881594, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/22", - "hash": 1376279050886549305, "orig": "c. Maps of the Permian basin.", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/31" } ], + "sref": "#/texts/22", + "subj_hash": 1376279050886549305, "text": "c. Maps of the Permian basin.", - "text-hash": 17379120122282474820, + "text_hash": 17379120122282474820, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/23", - "hash": 10155628801693924200, "orig": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/32" } ], + "sref": "#/texts/23", + "subj_hash": 10155628801693924200, "text": "d. Geological formations from the Miocene age with their depth, thickness, geographic location, and composition.", - "text-hash": 6073268612165724563, + "text_hash": 6073268612165724563, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/24", - "hash": 9107499507097280105, "orig": "e. List all high-Tc superconductors with their known crystallographic and material properties?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6100000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/33" } ], + "sref": "#/texts/24", + "subj_hash": 9107499507097280105, "text": "e. List all high-Tc superconductors with their known crystallographic and material properties?", - "text-hash": 14246074989165808788, + "text_hash": 14246074989165808788, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/25", - "hash": 7248467870339433322, "orig": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/34" } ], + "sref": "#/texts/25", + "subj_hash": 7248467870339433322, "text": "Question (a) undoubtedly fits the classic search paradigm, since here one can expect a search engine to find a number sources with exact answers (ie, definitions). Likewise, question (b) can be easily answered through metadata based filter rules on a literature database. Question (c) already requires some extent of domain knowledge to be encoded in a model to accurately classify the relevance of all known maps to the query, at least assuming no manual curation effort has been done. Questions (d) and (e) ultimately impose query capabilities which are clearly infeasible to support through manual curation, and are very unlikely to be answered in any single data source. These questions require the system to return a more complex data structure (eg, a table in which the rows list the formations or materials while the columns contain their respective properties).", - "text-hash": 13592184899010298257, + "text_hash": 13592184899010298257, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/26", - "hash": 13346892078888080449, "orig": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/35" } ], + "sref": "#/texts/26", + "subj_hash": 13346892078888080449, "text": "Concluding from the above examples, we define the following qualifying criteria for a system that supports deep data exploration on corpora:", - "text-hash": 9732050976592056956, + "text_hash": 9732050976592056956, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/27", - "hash": 1118972765223422660, "orig": "1. It can answer queries by combining different data elements from different sources into a new data structure.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/36" } ], + "sref": "#/texts/27", + "subj_hash": 1118972765223422660, "text": "1. It can answer queries by combining different data elements from different sources into a new data structure.", - "text-hash": 15389200666968750079, + "text_hash": 15389200666968750079, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/28", - "hash": 324023167304456371, "orig": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/37" } ], + "sref": "#/texts/28", + "subj_hash": 324023167304456371, "text": "2. It supports (1) by creating a knowledge model from a controlled, unstructured corpus in a mostly unsupervised way. It may profit from, but not require any manually curated data.", - "text-hash": 15837385157674255818, + "text_hash": 15837385157674255818, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/29", - "hash": 4651508276868765576, "orig": "3. It may restrict supported queries to a specific domain (eg, a technical field).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/38" } ], + "sref": "#/texts/29", + "subj_hash": 4651508276868765576, "text": "3. It may restrict supported queries to a specific domain (eg, a technical field).", - "text-hash": 11572955042484278451, + "text_hash": 11572955042484278451, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/30", - "hash": 3052020526349962744, "orig": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/39" } ], + "sref": "#/texts/30", + "subj_hash": 3052020526349962744, "text": "To meet the objectives defined earlier, CPS implements and tightly integrates two essential components. The first component is a scalable Knowledge Graph creation pipeline, which is used to automatically process text, tables and images through state-of-the-art segmentation and natural language understanding (NLU) models and extract entities and relationships from them. The second component serves the created KG, enabling users to perform deep queries and advanced graph analytics in real time. 2 This is supported through an underlying, highly optimized graph engine we developed to specifically address requirements for deep data exploration.", - "text-hash": 18009286910191614723, + "text_hash": 18009286910191614723, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/31", - "hash": 6725501529910185390, "orig": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/40" } ], + "sref": "#/texts/31", + "subj_hash": 6725501529910185390, "text": "It is worth noting that the CPS platform is a fully functioning cloud application that has been successfully deployed in multiple real-world scenarios in material science 3 and oil and gas industries. 4", - "text-hash": 11737175762912836309, + "text_hash": 11737175762912836309, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/32", - "hash": 14814111183601762276, "orig": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/41" } ], + "sref": "#/texts/32", + "subj_hash": 14814111183601762276, "text": "In the remainder of this paper, we discuss in detail the technical aspects and implementation details of the two main components of the CPS. In section 2, we present in depth how the platform extracts facts from corpora at a massive scale. In section 3, we go into detail of designing deep queries and show how we compute them in a very efficient", - "text-hash": 1414786465877142815, + "text_hash": 1414786465877142815, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/33", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/42" } ], + "sref": "#/texts/33", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/34", - "hash": 4361549266681704196, "orig": "3of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/44" } ], + "sref": "#/texts/34", + "subj_hash": 4361549266681704196, "text": "3of15", - "text-hash": 329104147711745343, + "text_hash": 329104147711745343, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/35", - "hash": 8043608144162608258, "orig": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/45" } ], + "sref": "#/texts/35", + "subj_hash": 8043608144162608258, "text": "way with our high-performance graph engine. Later, in section 4, we will discuss in detail how both components are deployed and interacting on the cloud. Finally, in section 5, we present the complete system in a real world case study and benchmark its accuracy.", - "text-hash": 13076251584287625657, + "text_hash": 13076251584287625657, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/36", - "hash": 7159467829896778939, "orig": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.75 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/46" } ], + "sref": "#/texts/36", + "subj_hash": 7159467829896778939, "text": "2 | SCALABLE KNOWLEDGE GRAPH CREATION", - "text-hash": 13901790948575121858, + "text_hash": 13901790948575121858, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/37", - "hash": 5617240156952377, "orig": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/47" } ], + "sref": "#/texts/37", + "subj_hash": 5617240156952377, "text": "In CPS, a Knowledge Graph is defined as a collection of entities and their relationships forming the graphs nodes and edges. Entities can have a wide variety of types. A basic scenario includes types such as documents, document components, keywords, and authors. In addition, there can be more specific types tied to domain verticals, such as materials and properties in material science, or geological ages, formations, rocks, minerals, structures, etc., for oil and gas exploration. Relationships in the KG are strictly defined between the entities. Similar to the entities, the relationships are typed (' has-material-property ' or ' has-geological-age '). Also, relationships in the KG can be weighted, for example, to represent the trustworthiness of a fact that the relationship represents.", - "text-hash": 16151270992855323972, + "text_hash": 16151270992855323972, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/38", - "hash": 3276490574487379366, "orig": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/48" } ], + "sref": "#/texts/38", + "subj_hash": 3276490574487379366, "text": "In typical cases, we start from a collection of documents in different formats. Sometimes, documents are available in semistructured, machine-interpretable formatssuchasJSON,XML,orHTML.However,inthevastmajority of cases this does not apply, especially for proprietary documents of companies and organizations. The latter are very often scanned or programmatic PDF documents. Using the CCS, 1 these types of documents are converted into structured JSON files. Those provide easy access to the meta-data (eg, title, abstract, references, authors) and the document body. The latter is structured by subtitles (of various levels), paragraphs, lists, tables (with internal row and column structures), figures, and linked captions. O n c et h ec o r p u si sp r e s n ti nas t r u c t u r e d,m a c h i n e processableformat,theKGiscreatedbyapplyingthreedistincttasks,namely extraction, annotation,and aggregation. The inherent dependencies between these three tasks are defined through a directed acyclic graph (DAG). We willrefertothisDAGoftasksasadataflow(DF).Inthenextsections,weestablishtheconceptofDFsanddiscuss the details for each DF task.", - "text-hash": 17496609193730656989, + "text_hash": 17496609193730656989, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/39", - "hash": 3367451956962330174, "orig": "2.1 | DF tasks", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/49" } ], + "sref": "#/texts/39", + "subj_hash": 3367451956962330174, "text": "2.1 | DF tasks", - "text-hash": 17765848133863277637, + "text_hash": 17765848133863277637, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/40", - "hash": 5509744459704235873, "orig": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/50" } ], + "sref": "#/texts/40", + "subj_hash": 5509744459704235873, "text": "In Figure 1, we sketch a minimal DF, in which each of the three tasks is used consecutively in order to generate entities and relationships for a generic KG. We will use Figure1toillustratethepurposeandimplementationof each DF task.", - "text-hash": 10647094536020604316, + "text_hash": 10647094536020604316, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/42", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/53" } ], + "sref": "#/texts/41", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/43", - "hash": 4361549176688508574, "orig": "4of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/54" } ], + "sref": "#/texts/42", + "subj_hash": 4361549176688508574, "text": "4of15", - "text-hash": 329104066308221861, + "text_hash": 329104066308221861, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/44", - "hash": 12374482891052873875, "orig": "2.1.1 | Extraction", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.5699999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/56" } ], + "sref": "#/texts/43", + "subj_hash": 12374482891052873875, "text": "2.1.1 | Extraction", - "text-hash": 8758905122433574314, + "text_hash": 8758905122433574314, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/45", - "hash": 2755397864153233778, "orig": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/57" } ], + "sref": "#/texts/44", + "subj_hash": 2755397864153233778, "text": "In an extraction task, we generate new data entities (eg, document components) from an original set of source entities (eg, documents). During this process, new links are created which connect these newly generated data entities to their original source entity. Typical examples of such extraction tasks are the extraction of abstracts, paragraphs, tables, or figures from the structured document files.", - "text-hash": 18305914688852125577, + "text_hash": 18305914688852125577, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/46", - "hash": 4698316471746130896, "orig": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/58" } ], + "sref": "#/texts/45", + "subj_hash": 4698316471746130896, "text": "From a scalability point of view, this task is embarrassingly parallel, which makes it extremely easy to implement on loosely interconnected environments such as a cloud. We simply iterate in parallel over all source entities in the backend database, extract the desired components and then insert those components as new data entities back into the database. Extraction tasks have no internal synchronization points.", - "text-hash": 11458501594938683627, + "text_hash": 11458501594938683627, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/47", - "hash": 11827267218358801841, "orig": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/59" } ], + "sref": "#/texts/46", + "subj_hash": 11827267218358801841, "text": "One particular benefit of this task is to make the query capability on the Knowledge Graph more fine grained by being able to provide provenance information on the result. For example, this would let the user explore all the paragraphs, tables, or figures that embed a certain fact.", - "text-hash": 8932299863639200460, + "text_hash": 8932299863639200460, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/48", - "hash": 6297710299044869343, "orig": "2.1.2 | Annotation", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/60" } ], + "sref": "#/texts/47", + "subj_hash": 6297710299044869343, "text": "2.1.2 | Annotation", - "text-hash": 12444247655523627494, + "text_hash": 12444247655523627494, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/49", - "hash": 7158837349769150986, "orig": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/61" } ], + "sref": "#/texts/48", + "subj_hash": 7158837349769150986, "text": "In the annotation task, we apply NLU methods to detect language entities and their relationships within a single data entity. Here, data entities can be as simple as a snippet of text (eg, a paragraph) or more complex structures such as tables or figures. The main goal of the annotation task is to obtain all relevant information from the data entity with regard to the domain of the corpus. Since different technical fields require different annotations, our annotation task is modular, allowing language entities to be annotated for material science, oil and gas, or more basic entities (eg, noun phrases, abbreviations, unit and values, etc.).", - "text-hash": 13902418307602972721, + "text_hash": 13902418307602972721, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/50", - "hash": 1150871476689677866, "orig": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/62" } ], + "sref": "#/texts/49", + "subj_hash": 1150871476689677866, "text": "From a technical perspective, the language entities are detected and annotated using multiple NLU methods, ranging from complex regular expressions \u2020 to LSTM networks. 5,6 We employ state-of-the-art NLU toolkits such as Spacy 7 or NLTK \u2021 to train and apply custom named entity recognition models. A detailed investigation of these NLU annotators unfortunately goes beyond of the scope of this paper. However, in Figure 2, we show the different types of named (geological) entities found in a paragraph by our oil and gas annotation model.", - "text-hash": 15370812655802342481, + "text_hash": 15370812655802342481, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/51", - "hash": 5163702913945903725, "orig": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/63" } ], + "sref": "#/texts/50", + "subj_hash": 5163702913945903725, "text": "In Listing 1, we also show an excerpt of how the annotations (both language entities and relationships) are stored in the backend. It is noteworthy here that relationships are stored as (weighted) links between two entity references. \u00a7 The usage of references reduces data duplication and more importantly ensures that the relationships are always defined between two known entities in the KG. The latter simplifies the aggregation of the relationships significantly, since no new entities need to be created in the KG in order to aggregate the relationships (see section 2.1.4).", - "text-hash": 11348986383696847000, + "text_hash": 11348986383696847000, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/52", - "hash": 5462319091745771382, "orig": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/64" } ], + "sref": "#/texts/51", + "subj_hash": 5462319091745771382, "text": "FIGURE 2 Illustration of various detected language entities in a particularly rich snippet of an AAPG abstract. 8 The language entities here are all related to geological concepts in the domain of oil and gas exploration", - "text-hash": 11050304000116997517, + "text_hash": 11050304000116997517, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/53", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/65" } ], + "sref": "#/texts/52", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/54", - "hash": 958124839653591304, "orig": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/68" } ], + "sref": "#/texts/53", + "subj_hash": 958124839653591304, "text": "LISTING 1 Excerpt of the annotated abstract from an AAPG paper 8 with its original text and the detected entities and relationships. Note that relationships are typed (encoded in the field name) and weighted. The weight reflects the confidence of the language annotation model during extraction. Relationships are always defined on detected entities, and will therefore use references defining a link between two entities", - "text-hash": 15194258930241746739, + "text_hash": 15194258930241746739, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/55", - "hash": 1448405324616602032, "orig": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/69" } ], + "sref": "#/texts/54", + "subj_hash": 1448405324616602032, "text": "From a scaling perspective, this task is again embarrassingly parallel. Unlike the extraction task, the annotation task is not creating new data entities, but rather appending new data associated with an existing data entity. We simply apply the desired entity and relationship annotators on all document components (paragraphs, tables, etc.) in parallel by distributing the operations on all available compute resources. Annotation tasks have no internal synchronization points. From a corpus of about 100 000 documents, we typically extract about 3 million paragraphs. Assuming unlimited resources, the annotation task could be distributed to potentially 3 million independent workers.", - "text-hash": 17018759417884348107, + "text_hash": 17018759417884348107, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/56", - "hash": 2617775076168299948, "orig": "2.1.3 | Aggregation of entities", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/70" } ], + "sref": "#/texts/55", + "subj_hash": 2617775076168299948, "text": "2.1.3 | Aggregation of entities", - "text-hash": 18150799209915986647, + "text_hash": 18150799209915986647, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/57", - "hash": 13974986056043304735, "orig": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/71" } ], + "sref": "#/texts/56", + "subj_hash": 13974986056043304735, "text": "The aggregation task for entities is similar to an extraction task, in the sense that we create new entities and link them each to the source they were mentioned in. In addition to extraction, the entity aggregation task also applies a similarity metric \u00b6 between the entities during extraction. This similarity metric will define if two entities refer to the same language concept and thus need to be represented by a single entity in the KG, rather than remaining separated. In Figure 1, we have illustrated the aggregation task for two types of entities across many different document components. These entity types could be for example materials and properties or geological formations and geological ages. The links connecting the new entities to their source entity are weighted according to the frequency of the match, that is, we set a higher weight if the language entity has been found multiple times. From an implementation point of view, the aggregation task for entities is nontrivial. In distributed computing, it corresponds to a reduction operation. Our implementation distributes the iteration of the source elements among all available computational resources. The aggregation is first performed in a local buffer, which is then synchronized with the backend database only when it reaches a maximum size. The synchronization step is a simple atomic update into an existing (or a newly created) database object. The synchronization for updates from each worker task does not collide with the others.", - "text-hash": 2253911354578933030, + "text_hash": 2253911354578933030, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/58", - "hash": 5985285694705576020, "orig": "2.1.4 | Aggregation of relationships", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/72" } ], + "sref": "#/texts/57", + "subj_hash": 5985285694705576020, "text": "2.1.4 | Aggregation of relationships", - "text-hash": 12765605759878485615, + "text_hash": 12765605759878485615, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/59", - "hash": 11235296141350659290, "orig": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/73" } ], + "sref": "#/texts/58", + "subj_hash": 11235296141350659290, "text": "The aggregation of relationships introduces new links between the entities that were aggregated in the previous aggregation operation. In Figure 1, this task is depicted as the last operation, where entities with an annotated relationship are explicitly linked together. For example, we create an edge between the Egret-Hibernia Petroleum System and Jeanne D'Arc Basin from Listing 1.", - "text-hash": 7583169921155047905, + "text_hash": 7583169921155047905, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/60", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/74" } ], + "sref": "#/texts/59", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/61", - "hash": 4361549266576336732, "orig": "6of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/75" } ], + "sref": "#/texts/60", + "subj_hash": 4361549266576336732, "text": "6of15", - "text-hash": 329104147615819111, + "text_hash": 329104147615819111, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/62", - "hash": 5771309285006424458, "orig": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/77" } ], + "sref": "#/texts/61", + "subj_hash": 5771309285006424458, "text": "Similar to the aggregation of entities, the aggregation task for relationships is a reduction operation. Two independent document components could describe the same relationship between two entities. To minimize the synchronization lookup operation with the backend database, this task also utilizes a local buffer which accumulates the changes to be committed to the KG until the maximum size is reached. This approach allows to distribute the computation among all the source document components and performs very few blocking operations in the backend database.", - "text-hash": 12691372718925440689, + "text_hash": 12691372718925440689, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/63", - "hash": 5371685212527510397, "orig": "2.2 | Data flows", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/78" } ], + "sref": "#/texts/62", + "subj_hash": 5371685212527510397, "text": "2.2 | Data flows", - "text-hash": 11140938221338345864, + "text_hash": 11140938221338345864, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/64", - "hash": 7817257645383866853, "orig": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/79" } ], + "sref": "#/texts/63", + "subj_hash": 7817257645383866853, "text": "The purpose of a DF is to provide an execution plan for the task types detailed above in a meaningful order to generate or update a specific KG. When instantiating a DF, one has the possibility to define in a declarative way:", - "text-hash": 12955841367339550496, + "text_hash": 12955841367339550496, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/65", - "hash": 2929626768872004841, "orig": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/80" } ], + "sref": "#/texts/64", + "subj_hash": 2929626768872004841, "text": "1. Which document components should be extracted from a converted corpus to form source entities (eg, extract all paragraphs, tables, figures and captions from the AAPG articles)?", - "text-hash": 17906500337671162388, + "text_hash": 17906500337671162388, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/66", - "hash": 15879756297712818143, "orig": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/81" } ], + "sref": "#/texts/65", + "subj_hash": 15879756297712818143, "text": "2. Which annotator model(s) to use on which type of source entity (eg, run the geology or material science annotators on paragraphs)?", - "text-hash": 2573988876245521638, + "text_hash": 2573988876245521638, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/67", - "hash": 16116531546352845311, "orig": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/82" } ], + "sref": "#/texts/66", + "subj_hash": 16116531546352845311, "text": "3. Which entity and relationship aggregations to perform on which set of annotated language entities?", - "text-hash": 2702000589258555142, + "text_hash": 2702000589258555142, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/68", - "hash": 9541434157786316356, "orig": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/83" } ], + "sref": "#/texts/67", + "subj_hash": 9541434157786316356, "text": "The DFs can thus be seen as blueprints for processing the corpus into a defined graph topology. Notably, our implementation of DFs and their tasks retains the flexibility of processing not only source documents of a well-known data schema such as from CCS, but virtually any structure that can be transformed to a JSON representation, including data entities from precurated databases. We designed the CPS platform to support export and import of DFs on entirely new datasets without the burden of recreating it from scratch.", - "text-hash": 6610972392363355263, + "text_hash": 6610972392363355263, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/69", - "hash": 997682002692959482, "orig": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/84" } ], + "sref": "#/texts/68", + "subj_hash": 997682002692959482, "text": "Our backend engine can exploit the DAG defined through the DF to massively distribute the individual tasks on all compute resources, because independent branches of the DAG each containing a chain of tasks can execute in parallel. The achievable level of parallelism changes throughout the execution. A practical example is a DF which extracts paragraphs and abstracts from all documents in the corpus, then annotates them and finally aggregates all entities. Here, the extraction tasks are distributed only over all documents; then, in the annotation tasks, we increase the parallelism to all document components. Any synchronization points thus can be pushed back into the aggregation tasks.", - "text-hash": 15235788623540001281, + "text_hash": 15235788623540001281, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/70", - "hash": 11590138063543342276, "orig": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8799999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/85" } ], + "sref": "#/texts/69", + "subj_hash": 11590138063543342276, "text": "3 | DEEP DATA EXPLORATION USING KNOWLEDGE GRAPHS", - "text-hash": 9254996552431571455, + "text_hash": 9254996552431571455, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/71", - "hash": 16380310806374538602, "orig": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/86" } ], + "sref": "#/texts/70", + "subj_hash": 16380310806374538602, "text": "We will now look into the requirements to perform deep data exploration on a populated Knowledge Graph. A deep data exploration requires two fundamental capabilities:", - "text-hash": 4676441280076073873, + "text_hash": 4676441280076073873, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/72", - "hash": 5393976293631695754, "orig": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8799999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/87" } ], + "sref": "#/texts/71", + "subj_hash": 5393976293631695754, "text": "1. perform deep queries on the graph, that is, queries that require multi-hop traversals and", - "text-hash": 11127633169729292465, + "text_hash": 11127633169729292465, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/73", - "hash": 1988335831916069382, "orig": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6200000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/88" } ], + "sref": "#/texts/72", + "subj_hash": 1988335831916069382, "text": "2. perform graph analytics on the full graph or subsets of it on-the-fly.", - "text-hash": 16834701212347777085, + "text_hash": 16834701212347777085, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/74", - "hash": 5147764798816678886, "orig": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8600000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/89" } ], + "sref": "#/texts/73", + "subj_hash": 5147764798816678886, "text": "Deep queries are essential to dynamically combine independent facts together in the given query context. This would apply for example to explorational queries aimed to characterize petroleum system elements, as detailed in our case study (see section 5). Graph analytics can further reveal hidden structure in the KG topology. Examples of advanced graphanalytical operations are page rank, node centralities, 9,10 node clustering, spectral analysis, and label propagation.", - "text-hash": 11297301064675504413, + "text_hash": 11297301064675504413, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/75", - "hash": 285583876932865368, "orig": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/90" } ], + "sref": "#/texts/74", + "subj_hash": 285583876932865368, "text": "Both deep queries and graph analytics have in common that they are inherently expensive to compute on conventional graph databases, due to a rapid expansion of the number of visited nodes as a function of the graph-traversal depth. This is a major obstacle in providing reasonable time-to-solution in the aforementioned cases. Virtually all established graph database products on the market today ** fall victim to this, as was also reported in multiple sources. 11,12 Due to the poor performance we observed with available graph databases, we developed a new graph engine for the CPS platform. This graph engine is able to execute advanced graph-analytics 2 as well as evaluate deep queries with multi-hop traversals on large graphs (>1B edges) extremely fast.", - "text-hash": 16231538415772072803, + "text_hash": 16231538415772072803, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/76", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/91" } ], + "sref": "#/texts/75", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/77", - "hash": 4361549257370278754, "orig": "7of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/93" } ], + "sref": "#/texts/76", + "subj_hash": 4361549257370278754, "text": "7of15", - "text-hash": 329104161989101977, + "text_hash": 329104161989101977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/78", - "hash": 13183039880198077038, "orig": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/94" } ], + "sref": "#/texts/77", + "subj_hash": 13183039880198077038, "text": "In the remaining part of this section, we elaborate on our newly developed graph engine. In section 3.1, we discuss the implementation design. In section 3.2, we discuss performance results and compare it to Neo4J. Later, in section 3.3, we will explain how the deep queries are formulated and evaluated in the graph engine.", - "text-hash": 10251595290936699029, + "text_hash": 10251595290936699029, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/79", - "hash": 13428900458866068249, "orig": "3.1 | Design of the graph engine", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/95" } ], + "sref": "#/texts/78", + "subj_hash": 13428900458866068249, "text": "3.1 | Design of the graph engine", - "text-hash": 9938197928077211940, + "text_hash": 9938197928077211940, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/80", - "hash": 1430911655724119030, "orig": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/96" } ], + "sref": "#/texts/79", + "subj_hash": 1430911655724119030, "text": "In computer science, two prevalent implementation schemes for graphs have emerged, one using adjacency lists and one relying on adjacency matrices. 13,14 In the adjacency list format, every node is essentially an object which contains a set of indices representing its neighbors. \u2020\u2020 The edges are therefore stored as a property of the node. In the adjacency matrix approach, all nodes obtain an identifier (typically an unsigned integer) and the edges are stored as a list of nodeidentifier tuples.", - "text-hash": 17396562708416737549, + "text_hash": 17396562708416737549, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/81", - "hash": 13770706479324480755, "orig": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/97" } ], + "sref": "#/texts/80", + "subj_hash": 13770706479324480755, "text": "It is commonly known that most graph operations can be translated into matrix-operations using linear algebra. 13 For example, consider the graph-traversal V ! A W, in which we start from a set of nodes V and traverse the edge A in order to obtain a new set of nodes W. This can be directly translated into linear algebra as", - "text-hash": 9596444718520353290, + "text_hash": 9596444718520353290, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/82", - "hash": 11165481757050847950, "orig": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/98" } ], + "sref": "#/texts/81", + "subj_hash": 11165481757050847950, "text": "w $^{!}$= Av ! with v $^{!}$$_{i}$= 1 if node i \\b V 0 if node i = 2 V , GLYPH \u00f0 1 \u00de", - "text-hash": 7657471412122468341, + "text_hash": 7657471412122468341, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/83", - "hash": 9572077971492738329, "orig": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/99" } ], + "sref": "#/texts/82", + "subj_hash": 9572077971492738329, "text": "and with A being the adjacency matrix representation of the edge A. Translating single graph-traversals into linear algebra operations significantly simplifies the job of deeper graph traversals. For example, to obtain the k-order neighborhood of node set V, one simply needs to evaluate Equation (1) k times recursively, as in", - "text-hash": 6656818579934057252, + "text_hash": 6656818579934057252, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/84", - "hash": 14951391138799557075, "orig": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/100" } ], + "sref": "#/texts/83", + "subj_hash": 14951391138799557075, "text": "w $^{!}$= A$^{k}$v $^{!}$= AA \u2026 Av ! GLYPHGLYPH GLYPH GLYPH GLYPH GLYPH : \u00f0 2 \u00de", - "text-hash": 1498163960925914858, + "text_hash": 1498163960925914858, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/85", - "hash": 16602156009514813718, "orig": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/101" } ], + "sref": "#/texts/84", + "subj_hash": 16602156009514813718, "text": "Therefore, deep queries can be implemented efficiently as long as Equation (1) can be evaluated efficiently. Over the past decades, lots of research has been conducted in the High Performance Computing community on the acceleration and parallelization of Equation (1) in the context of graphs. In this context, the matrix A is sparse and the linear operation of Equation (1) is referred to as a sparse matrix vector multiplication (SpMV), for which highly optimized implementations have been developed. 15,16 Notably, most advanced graph-analytical operations can be formulated using SpMV operations. The most trivial case is page-rank, in which one recursively executes Equation (1) in combination with a renormalization until w ! is equal to v $^{!}$. In our previous work, 2 we have also shown in detail that advanced graph-analytical operations such as node centralities and spectral analysis of the graph can be done effectively with only SpMV operations.", - "text-hash": 4445641728881669933, + "text_hash": 4445641728881669933, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/86", - "hash": 7162849562576593449, "orig": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/102" } ], + "sref": "#/texts/85", + "subj_hash": 7162849562576593449, "text": "Since both deep queries and advanced graph analytics hugely benefit from a fast SpMV kernel, we have opted to design the graph engine in the CPS platform to work entirely with the adjacency matrix format.", - "text-hash": 13884895358995816532, + "text_hash": 13884895358995816532, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/87", - "hash": 15385417954505503552, "orig": "3.2 | Memory architecture and performance optimization", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/103" } ], + "sref": "#/texts/86", + "subj_hash": 15385417954505503552, "text": "3.2 | Memory architecture and performance optimization", - "text-hash": 3140380205981200763, + "text_hash": 3140380205981200763, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/88", - "hash": 10815650641518265876, "orig": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/104" } ], + "sref": "#/texts/87", + "subj_hash": 10815650641518265876, "text": "Both adjacency lists and adjacency matrices-based graph implementations have specific advantages and disadvantages. The adjacency list format is very well suited for node-centric operations since it exploits data-locality for local graph operations, such as first order traversals. However, it proves suboptimal for global scale graph operations, which are required for deep queries and the advanced graph analytics. Here, one typically has to perform graph-traversals starting from many (or even all) nodes and accumulating the weight in the resulting nodes. In an adjacency list format, this often leads to many cache misses during execution, resulting in low performance. Furthermore, parallelizing global graph-traversals in the adjacency list format suffers significantly from concurrent write conflicts between threads during execution. In the adjacency matrix format, these problems are not encountered. The graph-traversals can be directly translated into a SpMV or even a sparse-matrix sparse-vector multiplication (SpMSpV). It has also been well established how to execute the SpMV effectively in a multithreaded fashion, and how to minimize cache-misses by applying a clever sorting of the tuples list. 17", - "text-hash": 7939832404963099695, + "text_hash": 7939832404963099695, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/89", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/105" } ], + "sref": "#/texts/88", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/91", - "hash": 12004249365408683930, "orig": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/110" } ], + "sref": "#/texts/89", + "subj_hash": 12004249365408683930, "text": "To illustrate the advantages of the adjacency matrix format for our needs, we show the time-to-solution (TTS) for queries with increasing order of traversals for Neo4J \u2021\u2021 and our graph engine in Figure 3. We computed a k-hop traversal query on the graph500 \u00a7\u00a7 (64M edges) and twitter-graph \u00b6\u00b6 (1.5B edges). Two important observations can be made. Firstly, our graph engine is able to run easily third, fourth, and even higher-order graph traversals. With Neo4J, this proves very difficult, as the TTS grows upwards of 1 hour. Secondly, our graph engine shows minimal variance in the TTS between all runs of the k-order graph-traversals. This is in stark contrast to Neo4J, where the TTS strongly depends on which node(s) one starts from.", - "text-hash": 9124629550221661345, + "text_hash": 9124629550221661345, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/92", - "hash": 7223381657047466215, "orig": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/111" } ], + "sref": "#/texts/90", + "subj_hash": 7223381657047466215, "text": "Another big advantage of using the adjacency matrix format is that we can exploit advanced compression methods 18 such as CSR or blocked COO. This reduces significantly the memory footprint of the graph and allows bigger graphs to be hosted entirely in-memory. In our case, we have opted to represent the edges by blocked matrices of a fixed size, in which each block matrix is of type COO. We chose the size of the block-matrix to be 2 16 = 65 536, allowing a pair of indices to be compactly represented by two unsigned short integers. Consequently, an edge has a memory footprint of only 4 bytes (equivalent to a single 32-bit integer), while a weighted edge a footprint of 8 bytes. *** This is a significant reduction in memory footprint compared to Neo4J graph databases, which use 33 bytes for unweighted edges $^{\u2020\u2020\u2020}$). Consequently, we can host graphs of close to 8 billion edges on a virtual machine with 32 GB of free memory, and even close to one trillion edges on a bare-metal POWER9 node with 4 TB of memory.", - "text-hash": 13549646715324792350, + "text_hash": 13549646715324792350, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/93", - "hash": 15132906055887224772, "orig": "3.3 | Formulation and evaluation of deep queries", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7099999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/112" } ], + "sref": "#/texts/91", + "subj_hash": 15132906055887224772, "text": "3.3 | Formulation and evaluation of deep queries", - "text-hash": 3609048564712975615, + "text_hash": 3609048564712975615, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/94", - "hash": 17129434987283608290, "orig": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/113" } ], + "sref": "#/texts/92", + "subj_hash": 17129434987283608290, "text": "The goal of querying a KG is to answer complex questions. As such, users need to be provided with a functionality to formulate complex queries on the KG and quickly evaluate them.", - "text-hash": 3711217782201102361, + "text_hash": 3711217782201102361, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/95", - "hash": 10350406469077463155, "orig": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/114" } ], + "sref": "#/texts/93", + "subj_hash": 10350406469077463155, "text": "In order to avoid imposing a complex query language onto users, we have devised a way to define complex graph queries in a declarative format, which we call a workflow. Workflows are represented as a DAG of operations and are conceptually related to DFs. Unlike the former, the nodes of workflow DAGs do not represent data-transformation tasks, but specific graph operations which mutate an input (or intermediate) set of nodes into another set. We call these operations worktasks. For further convenience, we have developed a graphical user interface (UI) which allows to define such workflows in a visual programming approach (see Figure 4).", - "text-hash": 6157696558870441610, + "text_hash": 6157696558870441610, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/96", - "hash": 16949854269270315165, "orig": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/115" } ], + "sref": "#/texts/94", + "subj_hash": 16949854269270315165, "text": "Currently, we support four fundamental types of worktasks: node-retrieval, traversal, logical operators and transform functions. In the following sections, we will discuss in detail how the worktasks are implemented in the context of our adjacency matrix design.", - "text-hash": 4111476184068705704, + "text_hash": 4111476184068705704, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/97", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/116" } ], + "sref": "#/texts/95", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/98", - "hash": 4361549266593946746, "orig": "9of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/118" } ], + "sref": "#/texts/96", + "subj_hash": 4361549266593946746, "text": "9of15", - "text-hash": 329104147597527681, + "text_hash": 329104147597527681, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/100", - "hash": 9802652237802670052, "orig": "3.3.1 | Node retrieval", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7099999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/121" } ], + "sref": "#/texts/97", + "subj_hash": 9802652237802670052, "text": "3.3.1 | Node retrieval", - "text-hash": 6349660887815587103, + "text_hash": 6349660887815587103, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/101", - "hash": 5524728206729419689, "orig": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/122" } ], + "sref": "#/texts/98", + "subj_hash": 5524728206729419689, "text": "This task finds a set of nodes which satisfy certain search criteria. This can range from finding a single node by its (approximate) name or exact node identifier, to finding nodes that satisfy a particular property. The task constructs a node vector v $^{!}$, such that", - "text-hash": 10699646946138261716, + "text_hash": 10699646946138261716, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/102", - "hash": 4043385013945968936, "orig": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/123" } ], + "sref": "#/texts/99", + "subj_hash": 4043385013945968936, "text": "v $^{!}$$_{i}$= 1 if node i \\b S 0 if node i = 2 S , GLYPH \u00f0 3 \u00de", - "text-hash": 588808569772103507, + "text_hash": 588808569772103507, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/103", - "hash": 11778884428660217326, "orig": "where S represents the set of nodes that satisfy the search criteria.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/124" } ], + "sref": "#/texts/100", + "subj_hash": 11778884428660217326, "text": "where S represents the set of nodes that satisfy the search criteria.", - "text-hash": 9277850099981357845, + "text_hash": 9277850099981357845, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/104", - "hash": 12875050310340408203, "orig": "3.3.2 | Graph traversal", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/125" } ], + "sref": "#/texts/101", + "subj_hash": 12875050310340408203, "text": "3.3.2 | Graph traversal", - "text-hash": 10555101842315227314, + "text_hash": 10555101842315227314, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/105", - "hash": 3785875504044487339, "orig": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/126" } ], + "sref": "#/texts/102", + "subj_hash": 3785875504044487339, "text": "The simplest type of graph-traversal is the direct graph-traversal. As explained in detail in section 3.1, these can be implemented as a straightforward SpMV operation w $^{!}$= Av $^{!}$. In more advanced types of graph-traversals, we evaluate all paths of different depth. Since the number of paths connecting two nodes might increase exponentially with the pathlength, one typically reduces the contribution of each path by weighting it with the inverse factorial of the path-length. For example, consider the case in which we want to explore deeper, indirect paths as follows,", - "text-hash": 909351913600217042, + "text_hash": 909351913600217042, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/106", - "hash": 12105626155924658285, "orig": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$\u2212 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/127" } ], + "sref": "#/texts/103", + "subj_hash": 12105626155924658285, "text": "w $^{!}$= A + A 2 2 ! + A 3 3 ! + GLYPH GLYPH GLYPH GLYPH GLYPH v $^{!}$= e$^{A}$- 1 GLYPH GLYPH v $^{!}$: \u00f0 4 \u00de", - "text-hash": 9027673695254677144, + "text_hash": 9027673695254677144, "type": "equation" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/107", - "hash": 16265612055607243129, "orig": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/128" } ], + "sref": "#/texts/104", + "subj_hash": 16265612055607243129, "text": "In its most generic case, a graph-traversal can therefore be written down as a matrix-function applied on an edge, that is, w $^{!}$= fA \u00f0 \u00de v $^{!}$. As discussed in detail in previous work, 2 this type of operation can be evaluated extremely efficiently using a recursive Chebyshev polynomial expansion.", - "text-hash": 4579475315408875396, + "text_hash": 4579475315408875396, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/108", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/129" } ], + "sref": "#/texts/105", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/109", - "hash": 10252446451495472512, "orig": "3.3.3 | Logical operations", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.9599999785423279 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/132" } ], + "sref": "#/texts/106", + "subj_hash": 10252446451495472512, "text": "3.3.3 | Logical operations", - "text-hash": 6188098459342469819, + "text_hash": 6188098459342469819, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/110", - "hash": 17011944206067158637, "orig": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/133" } ], + "sref": "#/texts/107", + "subj_hash": 17011944206067158637, "text": "In logical operations, two sets of nodes are merged into one resulting set, each represented through a node vector. There are three common logical operations, AND, OR, and NOT. In the AND and OR operations, we compute the geometric or the arithmetic mean respectively for each pairwise elements in the vectors. In the NOT operation, we inverse the sign for each element of the input vector.", - "text-hash": 3756558606376352920, + "text_hash": 3756558606376352920, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/111", - "hash": 16289627123982758705, "orig": "3.3.4 | Transform functions", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.4399999976158142 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/134" } ], + "sref": "#/texts/108", + "subj_hash": 16289627123982758705, "text": "3.3.4 | Transform functions", - "text-hash": 4767177430745297228, + "text_hash": 4767177430745297228, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/112", - "hash": 13969801897340997317, "orig": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/135" } ], + "sref": "#/texts/109", + "subj_hash": 13969801897340997317, "text": "Lastly, we implement operations which transform the weights associated with nodes. One such operation renormalizes and ultimately ranks the nodes according to their weight.", - "text-hash": 2263647560089238528, + "text_hash": 2263647560089238528, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/113", - "hash": 105697770555684555, "orig": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/136" } ], + "sref": "#/texts/110", + "subj_hash": 105697770555684555, "text": "With these four types of operations, we can express rich queries to answer complex questions, which can have multiple inputs and outputs. Let us now discuss how a workflow is evaluated within the graph engine. Once a workflow has been submitted, each worktask is initially assigned a vector. These vectors are all initialized to zero (v $^{!}$$_{i}$= 0). Next, the graph will analyze the DAG of worktasks and identify which tasks can be run in parallel. This is achieved by performing a topological sort using depth-first traversal, which yields a list in which each item is a set of tasks that can be executed in parallel. The graph engine then proceeds with the parallel task computations.", - "text-hash": 16051124526605366258, + "text_hash": 16051124526605366258, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/114", - "hash": 15938840672015995359, "orig": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/137" } ], + "sref": "#/texts/111", + "subj_hash": 15938840672015995359, "text": "For each task, we obtain a set of nodes with corresponding weights by identifying the nonzero elements in the associated node vector. After executing the full workflow, we therefore obtain for each task a list of nodes which can be sorted according to their weights. The higher the weight of the node, the more relevant this node is. As such, we can also retrace which nodes were important in each stage of the workflow.", - "text-hash": 2523894108122369766, + "text_hash": 2523894108122369766, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/115", - "hash": 16505790528099785698, "orig": "4 | CLOUD DESIGN AND DEPLOYMENT", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/138" } ], + "sref": "#/texts/112", + "subj_hash": 16505790528099785698, "text": "4 | CLOUD DESIGN AND DEPLOYMENT", - "text-hash": 4262729847538649369, + "text_hash": 4262729847538649369, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/116", - "hash": 14738723905055920039, "orig": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/139" } ], + "sref": "#/texts/113", + "subj_hash": 14738723905055920039, "text": "The primary deployment target for the CPS is a cloud environment orchestrated via Kubernetes. We package the full platform assets with a Helm chart for quick deployment on multiple setups. For example we can easily deploy the platform on the IBM Cloud or on-premise in an IBM Cloud Private instance, both on x86-and POWER-based nodes.", - "text-hash": 1485721651435830494, + "text_hash": 1485721651435830494, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/117", - "hash": 5699550326698755904, "orig": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/140" } ], + "sref": "#/texts/114", + "subj_hash": 5699550326698755904, "text": "In Figure 5, we show the high-level cloud design of the CPS. The platform allows to manage and instrument the corpus processing in a multitenant fashion, that is, it handles multiple knowledge ingestion pipelines and it serves multiple knowledge graphs. We call each unit a Knowledge Graph Space (KGS), which consists of a dedicated instance of the graph engine, a dedicated MongoDB database and a bucket on a cloud object store (COS). A dashboard allows each project owner to manage the access and the usage of resources. The KGS can be launched into multiple flavors to optimally balance the utilization of the cluster. These flavors range from a virtual machine with small amount of memory to a full dedicated node including hardware acceleration with GPUs. Once a KGS is created, it can be paused and rescaled without loss of data or downtime.", - "text-hash": 10750023430231115131, + "text_hash": 10750023430231115131, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/118", - "hash": 11609131422778723150, "orig": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/141" } ], + "sref": "#/texts/115", + "subj_hash": 11609131422778723150, "text": "For the KG creation pipeline, we implemented an asynchronous compute scheme we already use in our CCS solution. 1 The system is exposed to the user via an API frontend which communicates to the compute workers through a message broker and a result backend. The workers operate on the data, which is hosted on a NoSQL database and a cloud object store for data blobs. These workers are dynamically scaled by the cloud orchestrator to best match the current load of the platform.", - "text-hash": 9163968380151462261, + "text_hash": 9163968380151462261, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/119", - "hash": 788128893109726279, "orig": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/142" } ], + "sref": "#/texts/116", + "subj_hash": 788128893109726279, "text": "The processing of the KG creation typically starts with the user submitting the DF to the frontend API. The DAG of operations is then interpreted as described in the previous section and fine-grained tasks are submitted to the broker, for example, the whole corpus is split in many independent chunks. The user receives an overall status from the API and is notified when the DF processing has completed.", - "text-hash": 15724564631854553726, + "text_hash": 15724564631854553726, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/120", - "hash": 7029344862946908483, "orig": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/143" } ], + "sref": "#/texts/117", + "subj_hash": 7029344862946908483, "text": "The KG data are distributed between three storage solutions: a NoSQL database, a cloud object storage (COS) and the KGS. Each node is represented as a document in a NoSQL database which contains all the properties attached to the node, for example, the text of a paragraph. If there is a binary object attached to the node, for example, the PDF document or an image, this is stored on the COS. The KGS contains only the minimal information needed to execute the queries, that is, the connectivity of the graph and the properties which are indexed for filtering and search.", - "text-hash": 13806805648097199994, + "text_hash": 13806805648097199994, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/121", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/144" } ], + "sref": "#/texts/118", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/122", - "hash": 2144926686518491811, "orig": "11of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/146" } ], + "sref": "#/texts/119", + "subj_hash": 2144926686518491811, "text": "11of15", - "text-hash": 16380805707549272026, + "text_hash": 16380805707549272026, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/124", - "hash": 18333396269095847693, "orig": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/149" } ], + "sref": "#/texts/120", + "subj_hash": 18333396269095847693, "text": "The KGS is exposed to the user via a REST API which is able to aggregate results collected from the different storage sources. To ensure decent performance when serving queries of multiple users, the graph engine can be dynamically scaled horizontally. Most workflow queries execute fast enough such that they can be responded from a synchronous request. Others, especially the graph analytics computations, are more expensive and return large amounts of data. Thus, these queries are executed through an asynchronous API and the results are paginated and streamed back to the user on completion.", - "text-hash": 5024699355629880632, + "text_hash": 5024699355629880632, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/125", - "hash": 4030998538427149966, "orig": "5 | CASE STUDY: OIL AND GAS EXPLORATION", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.7599999904632568 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/150" } ], + "sref": "#/texts/121", + "subj_hash": 4030998538427149966, "text": "5 | CASE STUDY: OIL AND GAS EXPLORATION", - "text-hash": 956984534850296757, + "text_hash": 956984534850296757, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/126", - "hash": 10295608624766759271, "orig": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/151" } ], + "sref": "#/texts/122", + "subj_hash": 10295608624766759271, "text": "Oil and gas exploration is a complex, technical field of expertise. Unfortunately, the data of many geological processes and entities is scattered across databases (public and proprietary) and corpora of documents, where it is often deeply embedded in text, tables, and figures. This is a serious impediment for efficient exploration of new oil and gas opportunities. For example, geographic information of geological structures can be found in NaturalEarthData, \u2021\u2021\u2021 while their history, evolution, and components (eg, formations with their age, rock-composition, and depth) are discussed in reports (governmental and proprietary) and scientific articles. As such, experts in oil and gas exploration often need to read many documents in order to find all the information of a certain geographic area and get a good understanding of its underlying geology.", - "text-hash": 6212506812498931614, + "text_hash": 6212506812498931614, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/127", - "hash": 10633780781731536747, "orig": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/152" } ], + "sref": "#/texts/123", + "subj_hash": 10633780781731536747, "text": "The main tasks of the experts working in oil and gas exploration are to identify potential new exploration sites. This is typically done by describing a basin or one of its sub-regions. In practice, ' describing a basin ' boils down to identifying all geological formations with their properties in the basin and investigating if these formations constitute a petroleum system. 19 In its most minimalistic form, a petroleum system is defined by three components: source, reservoir, and seal. The source is the rock formation in which the oil or gas was created. Once created, the oil or gas typically migrates to a porous reservoir rock, which holds the oil and gas. In order for the oil and gas not to escape, the reservoir needs to be covered by an impermeable rock formation which is called the seal. Each one of these components is comprised of one or more formations, with a certain age and rock composition. To identify a petroleum system in a certain geographical area, one has to find a candidate formation for each component (ie, reservoir, seal, and source) and observe that the properties of these components satisfy some well-established constraints. For example, the reservoir", - "text-hash": 8189171326047604114, + "text_hash": 8189171326047604114, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/128", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/153" } ], + "sref": "#/texts/124", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/129", - "hash": 1080447728722590413, "orig": "12", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/154" } ], + "sref": "#/texts/125", + "subj_hash": 1080447728722590413, "text": "12", - "text-hash": 15441160910541481976, + "text_hash": 15441160910541481976, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/130", - "hash": 4361549257087816853, "orig": "of 15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/155" } ], + "sref": "#/texts/126", + "subj_hash": 4361549257087816853, "text": "of 15", - "text-hash": 329104161717916080, + "text_hash": 329104161717916080, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/132", - "hash": 10195664788154887804, "orig": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/159" } ], + "sref": "#/texts/127", + "subj_hash": 10195664788154887804, "text": "formation has to have a lower depth than the seal formation. Another example of such constraints is that the age of the seal and reservoir has to be older than the source.", - "text-hash": 5965659969661688967, + "text_hash": 5965659969661688967, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/133", - "hash": 7538054744015619336, "orig": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/160" } ], + "sref": "#/texts/128", + "subj_hash": 7538054744015619336, "text": "In order for the CPS platform to help the oil and gas explorationalists in their day-to-day job effectively, it needs to meet two objectives. On the one hand, it needs to create a consistent Knowledge Graph from a document corpus. This Knowledge Graph has to contain all geological formations with their respective properties (eg, geographical locations, depth, age, and rock composition). On the other hand, CPS needs to provide fast query responses, such that one can automatically retrieve potential components of petroleum systems and apply the constraints to filter out promising candidates.", - "text-hash": 13307027925001159475, + "text_hash": 13307027925001159475, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/134", - "hash": 12426662601736619109, "orig": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/161" } ], + "sref": "#/texts/129", + "subj_hash": 12426662601736619109, "text": "During the development and implementation of custom NLU annotators in CPS for oil and gas exploration, the client team worked hand in hand with the IBM Research team to set up a controlled accuracy benchmark in which the key capabilities of the CPS can be quantified. The goal of the benchmark was to test the entire pipeline depicted in Figure 6, that is, from PDF document ingestion to a final, queryable KG. The key components of this specific pipeline are,", - "text-hash": 8341863300316693152, + "text_hash": 8341863300316693152, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/135", - "hash": 4162783521620221579, "orig": "1. the conversion of PDF documents into JSON through CCS,", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.46000000834465027 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/162" } ], + "sref": "#/texts/130", + "subj_hash": 4162783521620221579, "text": "1. the conversion of PDF documents into JSON through CCS,", - "text-hash": 527957687390948274, + "text_hash": 527957687390948274, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/136", - "hash": 5135259059216244866, "orig": "2. the creation of the KG in the CPS from the JSON documents, and", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.7599999904632568 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/163" } ], + "sref": "#/texts/131", + "subj_hash": 5135259059216244866, "text": "2. the creation of the KG in the CPS from the JSON documents, and", - "text-hash": 11300804242294087097, + "text_hash": 11300804242294087097, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/137", - "hash": 16998817296948099535, "orig": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8700000047683716 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/164" } ], + "sref": "#/texts/132", + "subj_hash": 16998817296948099535, "text": "3. the querying of the KG served by CPS to identify petroleum systems elements with their properties.", - "text-hash": 4121058581451712246, + "text_hash": 4121058581451712246, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/138", - "hash": 1205649569241141618, "orig": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/165" } ], + "sref": "#/texts/133", + "subj_hash": 1205649569241141618, "text": "On the suggestion of the experts in the client team, the entire pipeline was run on the 1051 Field Evaluation Reports from the C&C Reservoirs \u00a7\u00a7\u00a7 dataset. The advantage of using this dataset for an accuracy benchmark is that each report includes two parts. One part is verbose text describing the history, evolution, and composition of the fields. The language used is of similar complexity to standard geological publications and thus a realistic challenge for our KG creation pipeline. The second part at the end of each report is comprised of tables which summarize the text and provide us the elements of the petroleum systems with their properties. Therefore, we ingest these reports into CCS and extract both text and tables. Then, by generating a KG only from the text and keeping the tables as ground-truth to compare answers of the KG queries against, we obtain a well-controlled, end-to-end accuracy benchmark.", - "text-hash": 17333577132913364873, + "text_hash": 17333577132913364873, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/139", - "hash": 12257840490666828590, "orig": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/166" } ], + "sref": "#/texts/134", + "subj_hash": 12257840490666828590, "text": "For step (1) of the pipeline, we ingested all 1051 PDFs into CCS and visually annotated the document structure on 300 (out of 46 019) pages. This yielded a page model which accurately converted all documents to JSON format with a 99.7% recall and 99.3% precision in the converted structure. These numbers are in line with those reported in our previous works. 1 Importantly, very accurate conversion results are key to the resulting quality, since otherwise the language annotators will process incomplete data and eventually the relevance of query results will suffer.", - "text-hash": 8803415231465414997, + "text_hash": 8803415231465414997, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/140", - "hash": 7040847965650746591, "orig": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/167" } ], + "sref": "#/texts/135", + "subj_hash": 7040847965650746591, "text": "In step (2), we create the Knowledge Graph by executing a DF that will generate all the entities and relationships relevant to the geology domain. Our language annotator models trained for geology extract geographic areas, geological structures (eg, basins), formations, ages, rocks, petroleum systems, and their elements (PSE) (eg, seal, source, and reservoir). Overall, we extracted a total of 4597 PSEs, 8811 formations, 471 geological ages, and 64 rock types (relevant to the PSEs). The full processing performed at an average rate of 130 ms per page per worker core, on a system with three worker nodes each using four cores. Eventually, the KG included 679 296 edges connecting 116 662 nodes.", - "text-hash": 13799731378750663142, + "text_hash": 13799731378750663142, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/141", - "hash": 7927601225025519287, "orig": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/168" } ], + "sref": "#/texts/136", + "subj_hash": 7927601225025519287, "text": "In step (3), we query the Knowledge Graph using a tailored evaluation workflow. This workflow allows us to identify PSEs and their connected properties in the Knowledge Graph, for example, their age, formation and rock", - "text-hash": 13120217128072555470, + "text_hash": 13120217128072555470, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/142", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/169" } ], + "sref": "#/texts/137", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/143", - "hash": 1080447728722590402, "orig": "13", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/171" } ], + "sref": "#/texts/138", + "subj_hash": 1080447728722590402, "text": "13", - "text-hash": 15441160910541481977, + "text_hash": 15441160910541481977, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/144", - "hash": 4361549257087816853, "orig": "of 15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/172" } ], + "sref": "#/texts/139", + "subj_hash": 4361549257087816853, "text": "of 15", - "text-hash": 329104161717916080, + "text_hash": 329104161717916080, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/147", - "hash": 8207961846673301043, "orig": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/177" } ], + "sref": "#/texts/140", + "subj_hash": 8207961846673301043, "text": "composition. In Figure 7, we visualize the DAG of this workflow. The final node weights are accumulated throughout the branches on the workflow and represent the relevance score of each node.", - "text-hash": 14933956665806015562, + "text_hash": 14933956665806015562, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/148", - "hash": 11998199584890640594, "orig": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/178" } ], + "sref": "#/texts/141", + "subj_hash": 11998199584890640594, "text": "To evaluate the correctness of the predicted PSE properties, we follow the standard practice of reporting the top-k accuracy. This is computed as the percentage in which any of the k highest ranked answers matches the expected answer, over all documents. In Table 1, we show the top-1, top-2, top-3, and top-5 accuracy for all properties of each petroleum system element. One can make two distinct observations. First, the top-1 numbers are in the range of 0.75-0.9, meaning that for 3 in 4 cases, the most relevant result predicted by the KG was correct (precision). Secondly, we observe that the top-5 numbers are very high (\u2265 0.97), showing that the system was able detect and aggregate most of the PSEs and their properties (recall). Thus, the recall of the language annotators in the KG creation pipeline was very satisfactory.", - "text-hash": 9121677663017059817, + "text_hash": 9121677663017059817, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/149", - "hash": 16446129547721407877, "orig": "6 | CONCLUSIONS", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/179" } ], + "sref": "#/texts/142", + "subj_hash": 16446129547721407877, "text": "6 | CONCLUSIONS", - "text-hash": 4326952903809379008, + "text_hash": 4326952903809379008, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/150", - "hash": 6720443978031524294, "orig": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/180" } ], + "sref": "#/texts/143", + "subj_hash": 6720443978031524294, "text": "With the introduction of the CPS platform, we demonstrate substantial benefit for domain experts and data scientists in exercising deep exploration of published knowledge in a fully integrated, yet modular cloud solution. CPS seamlessly connects to the CSS, complementing it with a highly scalable, automated pipeline to build consistent domain knowledge models and an intuitive, powerful approach to explorational queries and graph-scale analytics. This is accomplished through three fundamental design considerations: (1) We do not require manual data curation or annotation; (2) We built a scalable, efficient architecture to support the ingestion, processing and query workloads, all embedded in", - "text-hash": 11733208797674542845, + "text_hash": 11733208797674542845, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/151", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/181" } ], + "sref": "#/texts/144", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/152", - "hash": 2144926730621142072, "orig": "14of15", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/182" } ], + "sref": "#/texts/145", + "subj_hash": 2144926730621142072, "text": "14of15", - "text-hash": 16380805732317250115, + "text_hash": 16380805732317250115, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/153", - "hash": 14222671032550229818, "orig": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6000000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/184" } ], + "sref": "#/texts/146", + "subj_hash": 14222671032550229818, "text": "a single platform; and (3) We expose the capabilities through an intuitively consumable API and complementary UI tools.", - "text-hash": 1925144237473465665, + "text_hash": 1925144237473465665, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/154", - "hash": 17486770941839589126, "orig": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/185" } ], + "sref": "#/texts/147", + "subj_hash": 17486770941839589126, "text": "In our oil and gas case study, we successfully verified our solution for a real-world application with the help of subject matter experts from a client team. Currently, CCS and CPS are actively used in more than five client engagements, most notably in the oil and gas industry as well as in the material science industry.", - "text-hash": 5943448246547541309, + "text_hash": 5943448246547541309, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/155", - "hash": 16574813224778118841, "orig": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/186" } ], + "sref": "#/texts/148", + "subj_hash": 16574813224778118841, "text": "Future work will focus on processing public repositories such as the arXiv.org library, USPTO, and PubMed in order to make their content available to deep data exploration.", - "text-hash": 4472913868502496196, + "text_hash": 4472913868502496196, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/156", - "hash": 3356142343274371864, "orig": "DATA AVAILABILITY STATEMENT", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/187" } ], + "sref": "#/texts/149", + "subj_hash": 3356142343274371864, "text": "DATA AVAILABILITY STATEMENT", - "text-hash": 17772737780533561635, + "text_hash": 17772737780533561635, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/157", - "hash": 4778022085288441371, "orig": "Data subject to third party restrictions.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.6299999952316284 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/188" } ], + "sref": "#/texts/150", + "subj_hash": 4778022085288441371, "text": "Data subject to third party restrictions.", - "text-hash": 11662592888764396578, + "text_hash": 11662592888764396578, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/158", - "hash": 4361549257598904601, "orig": "ORCID", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8500000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/189" } ], + "sref": "#/texts/151", + "subj_hash": 4361549257598904601, "text": "ORCID", - "text-hash": 329104162230294308, + "text_hash": 329104162230294308, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/159", - "hash": 3523281823889115814, "orig": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", - "properties": { - "data": [ - [ - "semantic", - "meta-data", - 0.5799999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/190" } ], + "sref": "#/texts/152", + "subj_hash": 3523281823889115814, "text": "Peter W. J. Staar https://orcid.org/0000-0002-8088-0823 Michele Dolfi https://orcid.org/0000-0001-7216-8505 Christoph Auer https://orcid.org/0000-0001-5761-0422", - "text-hash": 1167445296370300893, + "text_hash": 1167445296370300893, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/160", - "hash": 8500729849894221215, "orig": "ENDNOTES", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/191" } ], + "sref": "#/texts/153", + "subj_hash": 8500729849894221215, "text": "ENDNOTES", - "text-hash": 14650266124350583462, + "text_hash": 14650266124350583462, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/161", - "hash": 7813503946963688644, "orig": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9900000095367432 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/192" } ], + "sref": "#/texts/154", + "subj_hash": 7813503946963688644, "text": "* For example, ElasticSearch (https://www.elastic.co) and ApacheLucene (https://lucene.apache.org).", - "text-hash": 12950565807350876671, + "text_hash": 12950565807350876671, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/162", - "hash": 9230987401345399746, "orig": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9100000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/193" } ], + "sref": "#/texts/155", + "subj_hash": 9230987401345399746, "text": "\u2020 Most language entities from a technical field are typically represented in a very specific, rigorous way that can be easily captured by regular expressions. We found that in practice, regular expressions often outperform DL models, since we can simply encode these representations.", - "text-hash": 6930355155738437881, + "text_hash": 6930355155738437881, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/163", - "hash": 1997735398126013155, "orig": "\u2021 https://www.nltk.org", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/194" } ], + "sref": "#/texts/156", + "subj_hash": 1997735398126013155, "text": "\u2021 https://www.nltk.org", - "text-hash": 16829787344811603994, + "text_hash": 16829787344811603994, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/164", - "hash": 13566764974477978642, "orig": "\u00a7 We follow the standard JSON-schema for references.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/195" } ], + "sref": "#/texts/157", + "subj_hash": 13566764974477978642, "text": "\u00a7 We follow the standard JSON-schema for references.", - "text-hash": 9498574747519310377, + "text_hash": 9498574747519310377, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/165", - "hash": 4925537010788978399, "orig": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/196" } ], + "sref": "#/texts/158", + "subj_hash": 4925537010788978399, "text": "\u00b6 A rather simple similarity metric is to perform a fuzzy comparison of the names of the newly found entities (ie, the name field found in Listing 1). A more sophisticated approach is to use word embeddings to identify if two concepts are similar.", - "text-hash": 11235784383716113382, + "text_hash": 11235784383716113382, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/166", - "hash": 16552665876195410077, "orig": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/197" } ], + "sref": "#/texts/159", + "subj_hash": 16552665876195410077, "text": "** For example Neo4J, Titan, JanusGraph, Amazon Neptune, and Arangodb.", - "text-hash": 4287966239864749480, + "text_hash": 4287966239864749480, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/167", - "hash": 17579390613842440572, "orig": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.800000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/198" } ], + "sref": "#/texts/160", + "subj_hash": 17579390613842440572, "text": "\u2020\u2020 This memory architecture is clearly documented for Titan (http://s3.thinkaurelius.com/docs/titan/current/data-model.html) and Neo4J (http://key-value-stories.blogspot.com/2015/02/neo4j-architecture.html).", - "text-hash": 5855266272999108487, + "text_hash": 5855266272999108487, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/168", - "hash": 722212543953276862, "orig": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/199" } ], + "sref": "#/texts/161", + "subj_hash": 722212543953276862, "text": "\u2021\u2021 We chose Neo4J as a reference since it is currently the most popular graph database solution, see https://db-engines.com/en/ranking_ trend/graph+dbms", - "text-hash": 15713827668903361733, + "text_hash": 15713827668903361733, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/169", - "hash": 11085577343317113173, "orig": "\u00a7\u00a7 http://graph500.org/", - "properties": { - "data": [ - [ - "semantic", - "header", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/200" } ], + "sref": "#/texts/162", + "subj_hash": 11085577343317113173, "text": "\u00a7\u00a7 http://graph500.org/", - "text-hash": 7449211522826545008, + "text_hash": 7449211522826545008, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/170", - "hash": 1792096630133661292, "orig": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6000000238418579 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/201" } ], + "sref": "#/texts/163", + "subj_hash": 1792096630133661292, "text": "\u00b6\u00b6 https://snap.stanford.edu/data/higgs-twitter.html", - "text-hash": 16747146533825186967, + "text_hash": 16747146533825186967, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/171", - "hash": 11462638369524745676, "orig": "*** We assume the weight can be represented by a float value.", - "properties": { - "data": [ - [ - "semantic", - "text", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/202" } ], + "sref": "#/texts/164", + "subj_hash": 11462638369524745676, "text": "*** We assume the weight can be represented by a float value.", - "text-hash": 7288340874592977655, + "text_hash": 7288340874592977655, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/172", - "hash": 16611805225457383637, "orig": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.4300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/203" } ], + "sref": "#/texts/165", + "subj_hash": 16611805225457383637, "text": "\u2020\u2020\u2020 https://neo4j.com/developer/guide-sizing-and-hardware-calculator/", - "text-hash": 4512570954370983408, + "text_hash": 4512570954370983408, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/173", - "hash": 1531505125666754945, "orig": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/204" } ], + "sref": "#/texts/166", + "subj_hash": 1531505125666754945, "text": "\u2021\u2021\u2021 https://www.naturalearthdata.com/", - "text-hash": 16922240937803157180, + "text_hash": 16922240937803157180, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/174", - "hash": 15684389308320953629, "orig": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/205" } ], + "sref": "#/texts/167", + "subj_hash": 15684389308320953629, "text": "\u00a7\u00a7\u00a7 https://www.ccreservoirs.com/", - "text-hash": 2845896203864732456, + "text_hash": 2845896203864732456, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/175", - "hash": 14590754343934702701, "orig": "REFERENCES", - "properties": { - "data": [ - [ - "semantic", - "header", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/206" } ], + "sref": "#/texts/168", + "subj_hash": 14590754343934702701, "text": "REFERENCES", - "text-hash": 1858797456585454232, + "text_hash": 1858797456585454232, "type": "subtitle-level-1" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/176", - "hash": 10480452763767134455, "orig": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8299999833106995 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/207" } ], + "sref": "#/texts/169", + "subj_hash": 10480452763767134455, "text": "1. Staar Peter WJ, Michele D, Christoph A, Costas B. Corpus conversion service: a machine learning platform to ingest documents at scale. KDD '18. New York, NY: ACM; 2018:774-782.", - "text-hash": 7982224532612302350, + "text_hash": 7982224532612302350, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/177", - "hash": 11866471329779366855, "orig": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/208" } ], + "sref": "#/texts/170", + "subj_hash": 11866471329779366855, "text": "2. Staar Peter WJ, Kl BP, Roxana I, et al. Stochastic Matrix-Function Estimators: Scalable Big-Data Kernels with High Performance. Chicago, IL: IEEE; 2016:812-821.", - "text-hash": 8969674542364551422, + "text_hash": 8969674542364551422, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/178", - "hash": 6016885898370676469, "orig": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/209" } ], + "sref": "#/texts/171", + "subj_hash": 6016885898370676469, "text": "3. Matteo M, Christoph A, Val'ery W, et al. An information extraction and knowledge graph platform for accelerating biochemical discoveries. ArXiv.abs/1907.08400; 2019.", - "text-hash": 12797055744904705040, + "text_hash": 12797055744904705040, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/179", - "hash": 13946275785662847920, "orig": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8199999928474426 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/210" } ], + "sref": "#/texts/172", + "subj_hash": 13946275785662847920, "text": "4. Paolo R, Marco P, Floriana B, Peter S, Costas B. Application of Geocognitive Technologies to Basin & Petroleum System Analyses, Texas: Society of Petroleum Engineers; 2019). Abu Dhabi International Petroleum Exhibition & Conference, Abu Dhabi, UAE, :10. https://doi. org/10.2118/197610-MS.", - "text-hash": 2278118371277588683, + "text_hash": 2278118371277588683, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/180", - "hash": 7693798302433367973, "orig": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9300000071525574 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/211" } ], + "sref": "#/texts/173", + "subj_hash": 7693798302433367973, "text": "5. Guillaume L, Miguel B, Sandeep S, Kazuya K, Chris D. Neural Architectures for Named Entity Recognition, Stroudsburg PA: Association for Computational Linguistics; 2016.", - "text-hash": 13426003943449777376, + "text_hash": 13426003943449777376, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/181", - "hash": 3109792572574236398, "orig": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/212" } ], + "sref": "#/texts/174", + "subj_hash": 3109792572574236398, "text": "6. Chiu Jason PC, Eric N. Named entity recognition with bidirectional LSTM-CNNs. TACL. 2016;4:357-370.", - "text-hash": 17942512882695875605, + "text_hash": 17942512882695875605, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/182", - "hash": 8111170387462350170, "orig": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9200000166893005 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/213" } ], + "sref": "#/texts/175", + "subj_hash": 8111170387462350170, "text": "7. Matthew H, Ines M. spaCy 2: natural language understanding with bloom embeddings, convolutional neural networks and incremental parsing. To appear. 2017.", - "text-hash": 15035325662489879393, + "text_hash": 15035325662489879393, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/183", - "hash": 14682702346227170925, "orig": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8600000143051147 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/214" } ], + "sref": "#/texts/176", + "subj_hash": 14682702346227170925, "text": "8. Magoon LB, Hudson TL, Peters KE. Egret-Hibernia(!), a significant petroleum system, northern Grand Banks area, offshore eastern Canada. Am Assoc Pet Geol Bull. 2005;89(9):1203-1237.", - "text-hash": 1825488956803771544, + "text_hash": 1825488956803771544, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/184", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/215" } ], + "sref": "#/texts/177", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/185", - "hash": 11430385775112165283, "orig": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 1.0 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/218" } ], + "sref": "#/texts/178", + "subj_hash": 11430385775112165283, "text": "9. Estrada E. Subgraph centrality in complex networks. Phys Rev E. 2005;71(5):056103.", - "text-hash": 7383629567386653914, + "text_hash": 7383629567386653914, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/186", - "hash": 5825495964576843004, "orig": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.699999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/219" } ], + "sref": "#/texts/179", + "subj_hash": 5825495964576843004, "text": "10. Estrada Ernesto, Higham Desmond J. (2010). Network Properties Revealed through Matrix Functions. SIAM Review, 52, (4), 696-714. http://dx.doi.org/10.1137/090761070.", - "text-hash": 12713726337853489671, + "text_hash": 12713726337853489671, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/187", - "hash": 5698421097735371040, "orig": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.5899999737739563 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/220" } ], + "sref": "#/texts/180", + "subj_hash": 5698421097735371040, "text": "11. Labs Redis. Benchmarking RedisGraph 1.0. 2019.", - "text-hash": 10746649133789046619, + "text_hash": 10746649133789046619, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/188", - "hash": 5870535063942256428, "orig": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.550000011920929 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/221" } ], + "sref": "#/texts/181", + "subj_hash": 5870535063942256428, "text": "12. TigerGraph. Real-Time Deep Link Analytics. 2018.", - "text-hash": 12596629408176592215, + "text_hash": 12596629408176592215, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/189", - "hash": 18196767266655606709, "orig": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.949999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/222" } ], + "sref": "#/texts/182", + "subj_hash": 18196767266655606709, "text": "13. Jeremy K, John G. Graph Algorithms in the Language of Linear Algebra. Philadelphia, PA: Society for Industrial and Applied Mathematics; 2011.", - "text-hash": 4940703957630358736, + "text_hash": 4940703957630358736, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/190", - "hash": 3623403683642367845, "orig": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.7799999713897705 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/223" } ], + "sref": "#/texts/183", + "subj_hash": 3623403683642367845, "text": "14. Kepner Jeremy, Bader David, Bulu\u00e7 Ayd \u0131 n, Gilbert John, Mattson Timothy, Meyerhenke Henning (2015). Graphs, Matrices, and the GraphBLAS: Seven Good Reasons. Procedia Computer Science, 51, 2453-2462. http://dx.doi.org/10.1016/j.procs.2015.05.353.", - "text-hash": 1288017376570396064, + "text_hash": 1288017376570396064, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/191", - "hash": 13936866850854297069, "orig": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9700000286102295 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/224" } ], + "sref": "#/texts/184", + "subj_hash": 13936866850854297069, "text": "15. Aydin B, Gilbert John R. The combinatorial BLAS: design, implementation, and applications. Int J High Perform Comput Appl. 2011;25 (4):496-509.", - "text-hash": 2215522210708998936, + "text_hash": 2215522210708998936, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/192", - "hash": 8497015665124263236, "orig": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.9800000190734863 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/225" } ], + "sref": "#/texts/185", + "subj_hash": 8497015665124263236, "text": "16. Jeremy K, Peter A, Bader David A, et al. Mathematical foundations of the GraphBLAS. 2016 IEEE HPEC. 2016; 1-9.", - "text-hash": 14644960259055240063, + "text_hash": 14644960259055240063, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/193", - "hash": 15947529491299956047, "orig": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.7900000214576721 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/226" } ], + "sref": "#/texts/186", + "subj_hash": 15947529491299956047, "text": "17. Ariful A, Mathias J, Aydin B, Ng Esmond G. The reverse Cuthill-McKee algorithm in distributed-memory. 2017 IEEE International Parallel and Distributed Processing Symposium (IPDPS). 2017: 22-31.", - "text-hash": 2515131343544103798, + "text_hash": 2515131343544103798, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/194", - "hash": 14843401725435831033, "orig": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.6600000262260437 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/227" } ], + "sref": "#/texts/187", + "subj_hash": 14843401725435831033, "text": "18. Rukhsana S, Anila U, Chughtai IR. Review of storage techniques for sparse matrices. 2005 Pakistan Section Multitopic Conference. 2005 1-7.", - "text-hash": 1389998498969001988, + "text_hash": 1389998498969001988, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/195", - "hash": 16676439669743530711, "orig": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.8899999856948853 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/228" } ], + "sref": "#/texts/188", + "subj_hash": 16676439669743530711, "text": "19. Welte DH, Horsfield B, Baker DR. Petroleum and Basin Evolution: Insights from Petroleum Geochemistry, Geology, and Basin Modeling, Berlin Heidelberg: Springer-Verlag; 1997.", - "text-hash": 4375808543141490670, + "text_hash": 4375808543141490670, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/196", - "hash": 2986547206451163051, "orig": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "properties": { - "data": [ - [ - "semantic", - "reference", - 0.699999988079071 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/229" } ], + "sref": "#/texts/189", + "subj_hash": 2986547206451163051, "text": "How to cite this article: Staar PWJ, Dolfi M, Auer C. Corpus processing service: A Knowledge Graph platform to perform deep data exploration on corpora. Applied AI Letters. 2020;1:e20. https://doi.org/10.1002/ail2.20", - "text-hash": 17781974298360978642, + "text_hash": 17781974298360978642, "type": "paragraph" }, { "dloc": "457bcbb2d189b4719daa30d94d946d913f1a6bddaabd1c12793b143a30e1115d#/texts/197", - "hash": 18391264192891079539, "orig": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "properties": { - "data": [ - [ - "semantic", - "text", - 0.8999999761581421 - ] - ], - "headers": [ - "type", - "label", - "confidence" - ] - }, "prov": [ { "$ref": "#/page-elements/230" } ], + "sref": "#/texts/190", + "subj_hash": 18391264192891079539, "text": "26895595, 2020, 2, Downloaded from https://onlinelibrary.wiley.com/doi/10.1002/ail2.20, Wiley Online Library on [23/08/2023]. See the Terms and Conditions (https://onlinelibrary.wiley.com/terms-and-conditions) on Wiley Online Library for rules of use; OA articles are governed by the applicable Creative Commons License", - "text-hash": 4975885909619128714, + "text_hash": 4975885909619128714, "type": "paragraph" } ] diff --git a/tests/data/glm/test_01A/glm_ref/topology.json b/tests/data/glm/test_01A/glm_ref/topology.json index c05dd0db..c5a11c47 100644 --- a/tests/data/glm/test_01A/glm_ref/topology.json +++ b/tests/data/glm/test_01A/glm_ref/topology.json @@ -29,12 +29,12 @@ [ -1, "prev", - 8990 + 9638 ], [ 1, "next", - 9060 + 9706 ], [ 2, @@ -64,12 +64,12 @@ [ 32, "tax-dn", - 1119 + 1111 ], [ 33, "tax-up", - 1690 + 1721 ], [ 64, @@ -84,92 +84,92 @@ [ 66, "to-singular", - 304 + 314 ], [ 67, "to-plural", - 304 + 314 ], [ 96, "to-token", - 1116 + 1073 ], [ 97, "from-token", - 1116 + 1073 ], [ 98, "to-pos", - 1994 + 1882 ], [ 99, "from-pos", - 1994 + 1882 ], [ 100, "to-label", - 1432 + 1572 ], [ 101, "from-label", - 1432 + 1572 ], [ 102, "to-root", - 1072 + 1127 ], [ 103, "from-root", - 1057 + 1094 ], [ 128, "to-sent", - 2135 + 2409 ], [ 129, "from-sent", - 2135 + 2409 ], [ 130, "to-text", - 1323 + 1502 ], [ 131, "from-text", - 1323 + 1502 ], [ 132, "to-table", - 5 + 0 ], [ 133, "from-table", - 5 + 0 ], [ 134, "to-doc", - 877 + 937 ], [ 135, "from-doc", - 877 + 937 ], [ 256, @@ -860,49 +860,49 @@ -1, "prev", 1, - 6625 + 7159 ], [ -1, "prev", 2, - 1382 + 1387 ], [ -1, "prev", 4, - 491 + 532 ], [ -1, "prev", 8, - 233 + 243 ], [ -1, "prev", 16, - 130 + 149 ], [ -1, "prev", 32, - 71 + 97 ], [ -1, "prev", 64, - 31 + 40 ], [ -1, "prev", 128, - 19 + 23 ], [ -1, @@ -914,13 +914,13 @@ -1, "prev", 512, - 2 + 3 ], [ -1, "prev", 1024, - 1 + 0 ], [ -1, @@ -992,49 +992,49 @@ 1, "next", 1, - 6673 + 7199 ], [ 1, "next", 2, - 1389 + 1395 ], [ 1, "next", 4, - 496 + 540 ], [ 1, "next", 8, - 236 + 248 ], [ 1, "next", 16, - 135 + 153 ], [ 1, "next", 32, - 72 + 100 ], [ 1, "next", 64, - 32 + 40 ], [ 1, "next", 128, - 19 + 23 ], [ 1, @@ -1046,13 +1046,13 @@ 1, "next", 512, - 2 + 3 ], [ 1, "next", 1024, - 1 + 0 ], [ 1, @@ -1778,37 +1778,37 @@ 32, "tax-dn", 0, - 164 + 172 ], [ 32, "tax-dn", 1, - 299 + 269 ], [ 32, "tax-dn", 2, - 547 + 559 ], [ 32, "tax-dn", 4, - 77 + 75 ], [ 32, "tax-dn", 8, - 22 + 27 ], [ 32, "tax-dn", 16, - 10 + 9 ], [ 32, @@ -1910,49 +1910,49 @@ 33, "tax-up", 0, - 164 + 172 ], [ 33, "tax-up", 1, - 637 + 622 ], [ 33, "tax-up", 2, - 652 + 663 ], [ 33, "tax-up", 4, - 128 + 139 ], [ 33, "tax-up", 8, - 65 + 76 ], [ 33, "tax-up", 16, - 38 + 36 ], [ 33, "tax-up", 32, - 4 + 10 ], [ 33, "tax-up", 64, - 2 + 3 ], [ 33, @@ -2312,7 +2312,7 @@ 66, "to-singular", 1, - 304 + 314 ], [ 66, @@ -2444,7 +2444,7 @@ 67, "to-plural", 1, - 304 + 314 ], [ 67, @@ -2576,19 +2576,19 @@ 96, "to-token", 1, - 855 + 807 ], [ 96, "to-token", 2, - 108 + 110 ], [ 96, "to-token", 4, - 91 + 86 ], [ 96, @@ -2600,7 +2600,7 @@ 96, "to-token", 16, - 16 + 23 ], [ 96, @@ -2612,7 +2612,7 @@ 96, "to-token", 64, - 2 + 3 ], [ 96, @@ -2708,19 +2708,19 @@ 97, "from-token", 1, - 855 + 807 ], [ 97, "from-token", 2, - 108 + 110 ], [ 97, "from-token", 4, - 91 + 86 ], [ 97, @@ -2732,7 +2732,7 @@ 97, "from-token", 16, - 16 + 23 ], [ 97, @@ -2744,7 +2744,7 @@ 97, "from-token", 64, - 2 + 3 ], [ 97, @@ -2840,55 +2840,55 @@ 98, "to-pos", 1, - 1196 + 1090 ], [ 98, "to-pos", 2, - 332 + 300 ], [ 98, "to-pos", 4, - 224 + 229 ], [ 98, "to-pos", 8, - 114 + 120 ], [ 98, "to-pos", 16, - 69 + 73 ], [ 98, "to-pos", 32, - 32 + 40 ], [ 98, "to-pos", 64, - 16 + 18 ], [ 98, "to-pos", 128, - 5 + 3 ], [ 98, "to-pos", 256, - 3 + 6 ], [ 98, @@ -2972,55 +2972,55 @@ 99, "from-pos", 1, - 1196 + 1090 ], [ 99, "from-pos", 2, - 332 + 300 ], [ 99, "from-pos", 4, - 224 + 229 ], [ 99, "from-pos", 8, - 114 + 120 ], [ 99, "from-pos", 16, - 69 + 73 ], [ 99, "from-pos", 32, - 32 + 40 ], [ 99, "from-pos", 64, - 16 + 18 ], [ 99, "from-pos", 128, - 5 + 3 ], [ 99, "from-pos", 256, - 3 + 6 ], [ 99, @@ -3104,43 +3104,43 @@ 100, "to-label", 1, - 890 + 964 ], [ 100, "to-label", 2, - 237 + 248 ], [ 100, "to-label", 4, - 139 + 167 ], [ 100, "to-label", 8, - 92 + 105 ], [ 100, "to-label", 16, - 58 + 60 ], [ 100, "to-label", 32, - 8 + 19 ], [ 100, "to-label", 64, - 7 + 8 ], [ 100, @@ -3236,43 +3236,43 @@ 101, "from-label", 1, - 890 + 964 ], [ 101, "from-label", 2, - 237 + 248 ], [ 101, "from-label", 4, - 139 + 167 ], [ 101, "from-label", 8, - 92 + 105 ], [ 101, "from-label", 16, - 58 + 60 ], [ 101, "from-label", 32, - 8 + 19 ], [ 101, "from-label", 64, - 7 + 8 ], [ 101, @@ -3362,25 +3362,25 @@ 102, "to-root", 0, - 167 + 175 ], [ 102, "to-root", 1, - 771 + 809 ], [ 102, "to-root", 2, - 105 + 104 ], [ 102, "to-root", 4, - 17 + 27 ], [ 102, @@ -3494,37 +3494,37 @@ 103, "from-root", 0, - 105 + 106 ], [ 103, "from-root", 1, - 510 + 517 ], [ 103, "from-root", 2, - 339 + 346 ], [ 103, "from-root", 4, - 69 + 87 ], [ 103, "from-root", 8, - 24 + 26 ], [ 103, "from-root", 16, - 10 + 12 ], [ 103, @@ -3632,19 +3632,19 @@ 128, "to-sent", 1, - 2089 + 2350 ], [ 128, "to-sent", 2, - 43 + 55 ], [ 128, "to-sent", 4, - 3 + 4 ], [ 128, @@ -3764,19 +3764,19 @@ 129, "from-sent", 1, - 2089 + 2350 ], [ 129, "from-sent", 2, - 43 + 55 ], [ 129, "from-sent", 4, - 3 + 4 ], [ 129, @@ -3896,19 +3896,19 @@ 130, "to-text", 1, - 1220 + 1365 ], [ 130, "to-text", 2, - 66 + 91 ], [ 130, "to-text", 4, - 33 + 42 ], [ 130, @@ -4028,19 +4028,19 @@ 131, "from-text", 1, - 1220 + 1365 ], [ 131, "from-text", 2, - 66 + 91 ], [ 131, "from-text", 4, - 33 + 42 ], [ 131, @@ -4160,13 +4160,13 @@ 132, "to-table", 1, - 1 + 0 ], [ 132, "to-table", 2, - 3 + 0 ], [ 132, @@ -4178,7 +4178,7 @@ 132, "to-table", 8, - 1 + 0 ], [ 132, @@ -4292,13 +4292,13 @@ 133, "from-table", 1, - 1 + 0 ], [ 133, "from-table", 2, - 3 + 0 ], [ 133, @@ -4310,7 +4310,7 @@ 133, "from-table", 8, - 1 + 0 ], [ 133, @@ -4424,31 +4424,31 @@ 134, "to-doc", 1, - 681 + 710 ], [ 134, "to-doc", 2, - 92 + 100 ], [ 134, "to-doc", 4, - 58 + 70 ], [ 134, "to-doc", 8, - 31 + 33 ], [ 134, "to-doc", 16, - 11 + 19 ], [ 134, @@ -4460,7 +4460,7 @@ 134, "to-doc", 64, - 1 + 2 ], [ 134, @@ -4556,31 +4556,31 @@ 135, "from-doc", 1, - 681 + 710 ], [ 135, "from-doc", 2, - 92 + 100 ], [ 135, "from-doc", 4, - 58 + 70 ], [ 135, "from-doc", 8, - 31 + 33 ], [ 135, "from-doc", 16, - 11 + 19 ], [ 135, @@ -4592,7 +4592,7 @@ 135, "from-doc", 64, - 1 + 2 ], [ 135, @@ -5107,12 +5107,12 @@ [ 0, "token", - 1690 + 1671 ], [ 1, "syntax", - 36 + 37 ], [ 2, @@ -5122,32 +5122,32 @@ [ 8, "cont", - 55 + 0 ], [ 9, "conn", - 179 + 187 ], [ 10, "term", - 985 + 1045 ], [ 11, "verb", - 447 + 479 ], [ 16, "sentence", - 300 + 361 ], [ 32, "text", - 178 + 188 ], [ 48, @@ -5251,7 +5251,7 @@ 0, "token", 0, - 60 + 17 ], [ 0, @@ -5269,25 +5269,25 @@ 0, "token", 4, - 204 + 202 ], [ 0, "token", 8, - 108 + 122 ], [ 0, "token", 16, - 69 + 74 ], [ 0, "token", 32, - 30 + 36 ], [ 0, @@ -5305,7 +5305,7 @@ 0, "token", 256, - 3 + 4 ], [ 0, @@ -5389,7 +5389,7 @@ 1, "syntax", 1, - 0 + 1 ], [ 1, @@ -5401,19 +5401,19 @@ 1, "syntax", 4, - 4 + 2 ], [ 1, "syntax", 8, - 2 + 1 ], [ 1, "syntax", 16, - 5 + 7 ], [ 1, @@ -5431,19 +5431,19 @@ 1, "syntax", 128, - 7 + 4 ], [ 1, "syntax", 256, - 10 + 11 ], [ 1, "syntax", 512, - 2 + 5 ], [ 1, @@ -5647,25 +5647,25 @@ 8, "cont", 0, - 4 + 0 ], [ 8, "cont", 1, - 41 + 0 ], [ 8, "cont", 2, - 5 + 0 ], [ 8, "cont", 4, - 4 + 0 ], [ 8, @@ -5677,7 +5677,7 @@ 8, "cont", 16, - 1 + 0 ], [ 8, @@ -5785,31 +5785,31 @@ 9, "conn", 1, - 105 + 106 ], [ 9, "conn", 2, - 17 + 24 ], [ 9, "conn", 4, - 22 + 20 ], [ 9, "conn", 8, - 17 + 16 ], [ 9, "conn", 16, - 8 + 10 ], [ 9, @@ -5821,7 +5821,7 @@ 9, "conn", 64, - 2 + 3 ], [ 9, @@ -5911,49 +5911,49 @@ 10, "term", 0, - 110 + 108 ], [ 10, "term", 1, - 684 + 716 ], [ 10, "term", 2, - 92 + 99 ], [ 10, "term", 4, - 55 + 66 ], [ 10, "term", 8, - 30 + 35 ], [ 10, "term", 16, - 11 + 18 ], [ 10, "term", 32, - 2 + 1 ], [ 10, "term", 64, - 1 + 2 ], [ 10, @@ -6049,31 +6049,31 @@ 11, "verb", 1, - 352 + 376 ], [ 11, "verb", 2, - 54 + 56 ], [ 11, "verb", 4, - 24 + 31 ], [ 11, "verb", 8, - 13 + 11 ], [ 11, "verb", 16, - 2 + 3 ], [ 11, @@ -6181,13 +6181,13 @@ 16, "sentence", 1, - 298 + 357 ], [ 16, "sentence", 2, - 2 + 3 ], [ 16, @@ -6205,7 +6205,7 @@ 16, "sentence", 16, - 0 + 1 ], [ 16, @@ -6313,7 +6313,7 @@ 32, "text", 1, - 67 + 77 ], [ 32, @@ -7109,7 +7109,7 @@ 0, "token", 0, - 41 + 4 ], [ 0, @@ -7121,37 +7121,37 @@ 0, "token", 2, - 971 + 961 ], [ 0, "token", 4, - 384 + 385 ], [ 0, "token", 8, - 166 + 180 ], [ 0, "token", 16, - 81 + 89 ], [ 0, "token", 32, - 28 + 32 ], [ 0, "token", 64, - 14 + 15 ], [ 0, @@ -7253,7 +7253,7 @@ 1, "syntax", 2, - 0 + 1 ], [ 1, @@ -7265,7 +7265,7 @@ 1, "syntax", 8, - 2 + 1 ], [ 1, @@ -7277,19 +7277,19 @@ 1, "syntax", 32, - 4 + 2 ], [ 1, "syntax", 64, - 10 + 11 ], [ 1, "syntax", 128, - 10 + 12 ], [ 1, @@ -7517,13 +7517,13 @@ 8, "cont", 2, - 47 + 0 ], [ 8, "cont", 4, - 7 + 0 ], [ 8, @@ -7535,7 +7535,7 @@ 8, "cont", 16, - 1 + 0 ], [ 8, @@ -7649,19 +7649,19 @@ 9, "conn", 2, - 106 + 107 ], [ 9, "conn", 4, - 31 + 36 ], [ 9, "conn", 8, - 22 + 23 ], [ 9, @@ -7679,7 +7679,7 @@ 9, "conn", 64, - 3 + 4 ], [ 9, @@ -7781,25 +7781,25 @@ 10, "term", 2, - 705 + 740 ], [ 10, "term", 4, - 120 + 131 ], [ 10, "term", 8, - 39 + 47 ], [ 10, "term", 16, - 11 + 17 ], [ 10, @@ -7913,13 +7913,13 @@ 11, "verb", 2, - 358 + 381 ], [ 11, "verb", 4, - 67 + 76 ], [ 11, @@ -7931,13 +7931,13 @@ 11, "verb", 16, - 4 + 3 ], [ 11, "verb", 32, - 1 + 2 ], [ 11, @@ -8045,13 +8045,13 @@ 16, "sentence", 2, - 298 + 357 ], [ 16, "sentence", 4, - 2 + 3 ], [ 16, @@ -8069,7 +8069,7 @@ 16, "sentence", 32, - 0 + 1 ], [ 16, @@ -8177,7 +8177,7 @@ 32, "text", 2, - 67 + 77 ], [ 32, @@ -8967,49 +8967,49 @@ 0, "token", 0, - 41 + 4 ], [ 0, "token", 1, - 920 + 915 ], [ 0, "token", 2, - 266 + 265 ], [ 0, "token", 4, - 215 + 216 ], [ 0, "token", 8, - 114 + 121 ], [ 0, "token", 16, - 70 + 77 ], [ 0, "token", 32, - 36 + 42 ], [ 0, "token", 64, - 17 + 18 ], [ 0, @@ -9021,7 +9021,7 @@ 0, "token", 256, - 4 + 6 ], [ 0, @@ -9117,43 +9117,43 @@ 1, "syntax", 4, - 3 + 2 ], [ 1, "syntax", 8, - 3 + 2 ], [ 1, "syntax", 16, - 5 + 6 ], [ 1, "syntax", 32, - 0 + 2 ], [ 1, "syntax", 64, - 4 + 2 ], [ 1, "syntax", 128, - 3 + 2 ], [ 1, "syntax", 256, - 7 + 11 ], [ 1, @@ -9165,13 +9165,13 @@ 1, "syntax", 1024, - 5 + 3 ], [ 1, "syntax", 2048, - 0 + 1 ], [ 1, @@ -9369,19 +9369,19 @@ 8, "cont", 1, - 43 + 0 ], [ 8, "cont", 2, - 4 + 0 ], [ 8, "cont", 4, - 7 + 0 ], [ 8, @@ -9393,7 +9393,7 @@ 8, "cont", 16, - 1 + 0 ], [ 8, @@ -9501,37 +9501,37 @@ 9, "conn", 1, - 105 + 106 ], [ 9, "conn", 2, - 17 + 24 ], [ 9, "conn", 4, - 21 + 20 ], [ 9, "conn", 8, - 17 + 15 ], [ 9, "conn", 16, - 9 + 10 ], [ 9, "conn", 32, - 5 + 7 ], [ 9, @@ -9543,13 +9543,13 @@ 9, "conn", 128, - 1 + 0 ], [ 9, "conn", 256, - 1 + 2 ], [ 9, @@ -9633,31 +9633,31 @@ 10, "term", 1, - 681 + 710 ], [ 10, "term", 2, - 92 + 100 ], [ 10, "term", 4, - 58 + 70 ], [ 10, "term", 8, - 31 + 33 ], [ 10, "term", 16, - 11 + 19 ], [ 10, @@ -9669,7 +9669,7 @@ 10, "term", 64, - 1 + 2 ], [ 10, @@ -9765,31 +9765,31 @@ 11, "verb", 1, - 352 + 376 ], [ 11, "verb", 2, - 52 + 55 ], [ 11, "verb", 4, - 25 + 31 ], [ 11, "verb", 8, - 13 + 11 ], [ 11, "verb", 16, - 3 + 4 ], [ 11, @@ -9897,13 +9897,13 @@ 16, "sentence", 1, - 298 + 357 ], [ 16, "sentence", 2, - 2 + 3 ], [ 16, @@ -9921,7 +9921,7 @@ 16, "sentence", 16, - 0 + 1 ], [ 16, @@ -10029,7 +10029,7 @@ 32, "text", 1, - 67 + 77 ], [ 32, diff --git a/tests/data/texts/references.nlp.jsonl b/tests/data/texts/references.nlp.jsonl index e43011fb..811ecf4d 100644 --- a/tests/data/texts/references.nlp.jsonl +++ b/tests/data/texts/references.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["reference", "citation-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3089690646178643593, 8143668872857370346, 18446744073709551615, 18446744073709551615, 4, 16, 4, 16, 3, 6, true, "J. Nagamatsu", "J. Nagamatsu"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 4958313984110456424, 12758216704979571657, 18446744073709551615, 18446744073709551615, 18, 29, 18, 29, 7, 10, true, "N. Nakagawa", "N. Nakagawa"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 11365016642846088308, 8423174012903247560, 18446744073709551615, 18446744073709551615, 31, 42, 31, 42, 11, 14, true, "T. Muranaka", "T. Muranaka"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 3078907282781552519, 13717640772378057531, 18446744073709551615, 18446744073709551615, 44, 55, 44, 55, 15, 18, true, "Y. Zenitani", "Y. Zenitani"], ["reference", "author", 14523797031010145779, "TEXT", "#", 1.0, 1988581422311921121, 4091673062415006471, 18446744073709551615, 18446744073709551615, 61, 72, 61, 72, 20, 23, true, "J. Akimitsu", "J. Akimitsu"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164561, 18446744073709551615, 18446744073709551615, 74, 120, 74, 120, 24, 31, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, 18446744073709551615, 18446744073709551615, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["reference", "pages", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.8899999856948853]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["reference", "citation-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14650265762971425816, 3097372269338040450, 18446744073709551615, 18446744073709551615, 4, 12, 4, 12, 3, 6, true, "E. Bauer", "E. Bauer"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 12745877337770851176, 17492495346968875636, 18446744073709551615, 18446744073709551615, 14, 25, 14, 25, 7, 10, true, "G. Hilscher", "G. Hilscher"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 6559161264042875188, 15358198509906445555, 18446744073709551615, 18446744073709551615, 27, 36, 27, 36, 11, 14, true, "H. Michor", "H. Michor"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350732852553197, 2495209709904939132, 18446744073709551615, 18446744073709551615, 38, 45, 38, 45, 15, 18, true, "C. Paul", "C. Paul"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 1872309045500499681, 10241615369928072261, 18446744073709551615, 18446744073709551615, 47, 60, 47, 60, 19, 24, true, "E. W. Scheidt", "E. W. Scheidt"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 5422770472651955982, 11772559073191013545, 18446744073709551615, 18446744073709551615, 62, 73, 62, 73, 25, 28, true, "A. Gribanov", "A. Gribanov"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 14822239363118939802, 17124912415138671071, 18446744073709551615, 18446744073709551615, 75, 87, 75, 87, 29, 32, true, "Y. Seropegin", "Y. Seropegin"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106350362458218625, 16869276978878653097, 18446744073709551615, 18446744073709551615, 89, 97, 89, 96, 33, 36, true, "H. No\u00ebl", "H. No\u00ebl"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 9282842575735043676, 14453998140420302199, 18446744073709551615, 18446744073709551615, 99, 109, 98, 108, 37, 40, true, "M. Sigrist", "M. Sigrist"], ["reference", "author", 4183773491823524238, "TEXT", "#", 1.0, 8106352579825635529, 1766684285595822750, 18446744073709551615, 18446744073709551615, 115, 122, 114, 121, 42, 45, true, "P. Rogl", "P. Rogl"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518782, 18446744073709551615, 18446744073709551615, 124, 204, 123, 203, 46, 57, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912422, 18446744073709551615, 18446744073709551615, 206, 221, 205, 220, 58, 63, true, "Phys. Rev. Lett", "Phys. Rev. Lett"], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "reference-number", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576118, null, null, 0, 3, 0, 3, 0, 3, true, "1", "[1]"], ["reference", "authors", 14523797031010145779, "TEXT", "#", 1.0, 13167782075772401771, 9399779311929866553, null, null, 4, 73, 4, 73, 3, 24, true, "J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu", "J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu,"], ["reference", "title", 14523797031010145779, "TEXT", "#", 1.0, 5664222832544310573, 8851660666775164566, null, null, 74, 121, 74, 121, 24, 32, true, "Superconductivity at 39K in magnesium diboride", "Superconductivity at 39K in magnesium diboride,"], ["reference", "journal", 14523797031010145779, "TEXT", "#", 1.0, 16381206556987855680, 17301021513739771795, null, null, 122, 128, 122, 128, 32, 33, true, "Nature", "Nature"], ["reference", "volume", 14523797031010145779, "TEXT", "#", 1.0, 8104407864682872540, 17344131718252767312, null, null, 129, 136, 129, 136, 33, 36, true, "410, 63", "410, 63"], ["reference", "date", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310666, null, null, 137, 144, 137, 144, 36, 40, true, "2001", "(2001)."]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["reference", "reference-number", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961154, null, null, 0, 3, 0, 3, 0, 3, true, "9", "[9]"], ["reference", "authors", 4183773491823524238, "TEXT", "#", 1.0, 18273863669034285590, 7628635609201023314, null, null, 4, 123, 4, 122, 3, 46, true, "E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl", "E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl,"], ["reference", "title", 4183773491823524238, "TEXT", "#", 1.0, 11765854581783747448, 17959576961439518781, null, null, 124, 205, 123, 204, 46, 58, true, "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si", "Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si,"], ["reference", "journal", 4183773491823524238, "TEXT", "#", 1.0, 12289997722495770339, 2364683881599912423, null, null, 206, 222, 205, 221, 58, 64, true, "Phys. Rev. Lett", "Phys. Rev. Lett."], ["reference", "volume", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610503, null, null, 223, 226, 222, 225, 64, 66, true, "92", "92,"], ["reference", "pages", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["reference", "date", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477263, null, null, 234, 241, 233, 240, 67, 71, true, "2004", "(2004)."]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/semantics.nlp.jsonl b/tests/data/texts/semantics.nlp.jsonl index 821624cb..441b861a 100644 --- a/tests/data/texts/semantics.nlp.jsonl +++ b/tests/data/texts/semantics.nlp.jsonl @@ -1,7 +1,7 @@ -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 7759316032128614217, "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", "header", 0.7099999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text-hash": 11303007895399162817, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14339562343989983509, "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", "meta-data", 0.800000011920929]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text-hash": 17380979703907035493, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 18143996061359107703, "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "IBM Research, Rueschlikon, Switzerland", "text-hash": 3204757815416943811, "type": "text"} -{"applied-models": ["link", "numval"], "dloc": "", "hash": 11035282656876697300, "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", "meta-data", 1.0]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "ABSTRACT", "text-hash": 14650435066888584228, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14993488697470108654, "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, 18446744073709551615, 18446744073709551615, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, 18446744073709551615, 18446744073709551615, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text-hash": 164218115435155290, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 14523797031010145779, "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, 18446744073709551615, 18446744073709551615, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, 18446744073709551615, 18446744073709551615, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, 18446744073709551615, 18446744073709551615, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, 18446744073709551615, 18446744073709551615, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", "reference", 0.8899999856948853]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text-hash": 18067349248114064711, "type": "text"} -{"applied-models": ["link", "numval", "semantic"], "dloc": "", "hash": 4183773491823524238, "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, 18446744073709551615, 18446744073709551615, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, 18446744073709551615, 18446744073709551615, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, 18446744073709551615, 18446744073709551615, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, 18446744073709551615, 18446744073709551615, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, 18446744073709551615, 18446744073709551615, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", "reference", 0.9399999976158142]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text-hash": 7798907214565353722, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "properties": {"data": [["semantic", 7759316032128614217, "TEXT", "#", "reference", 0.48]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 7759316032128614217, "text": "Corpus Conversion Service: A Machine Learning Platform to Ingest Documents at Scale.", "text_hash": 11303007895399162817, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "properties": {"data": [["semantic", 14339562343989983509, "TEXT", "#", "meta-data", 0.88]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14339562343989983509, "text": "Peter W J Staar, Michele Dolfi, Christoph Auer, Costas Bekas", "text_hash": 17380979703907035493, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "IBM Research, Rueschlikon, Switzerland", "properties": {"data": [["semantic", 18143996061359107703, "TEXT", "#", "meta-data", 0.71]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 18143996061359107703, "text": "IBM Research, Rueschlikon, Switzerland", "text_hash": 3204757815416943811, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "ABSTRACT", "properties": {"data": [["semantic", 11035282656876697300, "TEXT", "#", "header", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 11035282656876697300, "text": "ABSTRACT", "text_hash": 14650435066888584228, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 15441160910541481353, 16442221201258166387, null, null, 447, 449, 447, 449, 81, 82, true, "99", "99"], ["numval", "ival", 14993488697470108654, "TEXT", "#", 1.0, 12178341415896436703, 785115088598742882, null, null, 599, 602, 599, 602, 106, 107, true, "250", "250"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "properties": {"data": [["semantic", 14993488697470108654, "TEXT", "#", "text", 1.0]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14993488697470108654, "text": "We will show that each of the modules is scalable due to an asynchronous microservice architecture and can therefore handle massive amounts of documents. Furthermore, we will show that our capability to gather ground-truth is accelerated by machine-learning algorithms by at least one order of magnitude. This allows us to both gather large amounts of ground-truth in very little time and obtain very good precision/recall metrics in the range of 99% with regard to content conversion to structured output. The CCS platform is currently deployed on IBM internal infrastructure and serving more than 250 active users for knowledge-engineering project engagements.", "text_hash": 164218115435155290, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 17767354399704235161, 10322896225031576180, null, null, 1, 2, 1, 2, 1, 2, true, "1", "1"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481728, 16379900111711101126, null, null, 95, 97, 95, 97, 26, 27, true, "39", "39"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 12178341415896306520, 238001515004691493, null, null, 129, 132, 129, 132, 33, 34, true, "410", "410"], ["numval", "ival", 14523797031010145779, "TEXT", "#", 1.0, 15441160910541481008, 16379900220609196175, null, null, 134, 136, 134, 136, 35, 36, true, "63", "63"], ["numval", "year", 14523797031010145779, "TEXT", "#", 1.0, 389609625548757414, 10839581444433310597, null, null, 138, 142, 138, 142, 37, 38, true, "2001", "2001"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "properties": {"data": [["semantic", 14523797031010145779, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 14523797031010145779, "text": "[1] J. Nagamatsu, N. Nakagawa, T. Muranaka, Y. Zenitani, and J. Akimitsu, Superconductivity at 39K in magnesium diboride, Nature 410, 63 (2001).", "text_hash": 18067349248114064711, "type": "text"} +{"applied_models": ["link", "numval", "semantic"], "dloc": "#", "instances": {"data": [["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235153, 9792860093610961218, null, null, 1, 2, 1, 2, 1, 2, true, "9", "9"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 17767354399704235163, 9792860093755571418, null, null, 201, 202, 200, 201, 55, 56, true, "3", "3"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 15441160910541481458, 17735156534724610520, null, null, 223, 225, 222, 224, 64, 65, true, "92", "92"], ["numval", "ival", 4183773491823524238, "TEXT", "#", 1.0, 16380805713199014127, 12790542105476230142, null, null, 227, 233, 226, 232, 66, 67, true, "027003", "027003"], ["numval", "year", 4183773491823524238, "TEXT", "#", 1.0, 389609625548757411, 2957403781943477324, null, null, 235, 239, 234, 238, 68, 69, true, "2004", "2004"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "properties": {"data": [["semantic", 4183773491823524238, "TEXT", "#", "reference", 0.99]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4183773491823524238, "text": "[9] E. Bauer, G. Hilscher, H. Michor, C. Paul, E. W. Scheidt, A. Gribanov, Y. Seropegin, H. No\u00ebl, M. Sigrist, and P. Rogl, Heavy fermion superconductivity and magnetic order in noncentrosymmetric CePt3Si, Phys. Rev. Lett. 92, 027003 (2004).", "text_hash": 7798907214565353722, "type": "text"} diff --git a/tests/data/texts/terms.nlp.jsonl b/tests/data/texts/terms.nlp.jsonl index c42f0ffa..6975eda1 100644 --- a/tests/data/texts/terms.nlp.jsonl +++ b/tests/data/texts/terms.nlp.jsonl @@ -1,2 +1,2 @@ -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 9818235231875948258, "instances": {"data": [["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, 18446744073709551615, 18446744073709551615, 0, 177, 0, 164, 0, 35, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, 18446744073709551615, 18446744073709551615, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, 18446744073709551615, 18446744073709551615, 7, 31, 7, 26, 1, 7, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, 18446744073709551615, 18446744073709551615, 16, 26, 16, 23, 4, 5, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, 18446744073709551615, 18446744073709551615, 27, 30, 24, 25, 5, 6, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, 18446744073709551615, 18446744073709551615, 48, 63, 43, 58, 10, 12, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, 18446744073709551615, 18446744073709551615, 64, 122, 59, 109, 12, 22, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, 18446744073709551615, 18446744073709551615, 73, 95, 68, 88, 15, 17, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, 18446744073709551615, 18446744073709551615, 96, 106, 89, 98, 17, 19, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, 18446744073709551615, 18446744073709551615, 107, 121, 99, 108, 19, 21, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, 18446744073709551615, 18446744073709551615, 123, 127, 110, 114, 23, 26, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, 18446744073709551615, 18446744073709551615, 124, 126, 111, 113, 24, 25, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, 18446744073709551615, 18446744073709551615, 128, 130, 115, 117, 26, 27, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, 18446744073709551615, 18446744073709551615, 133, 140, 120, 127, 28, 29, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, 18446744073709551615, 18446744073709551615, 141, 158, 128, 145, 29, 31, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, 18446744073709551615, 18446744073709551615, 159, 161, 146, 148, 31, 32, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, 18446744073709551615, 18446744073709551615, 162, 176, 149, 163, 32, 34, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, 18446744073709551615, 18446744073709551615, 170, 176, 157, 163, 33, 34, true, "Europe", "Europe"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, 18446744073709551615, 18446744073709551615, 178, 375, 165, 362, 35, 67, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, 18446744073709551615, 18446744073709551615, 186, 194, 173, 181, 37, 38, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, 18446744073709551615, 18446744073709551615, 195, 211, 182, 198, 38, 40, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, 18446744073709551615, 18446744073709551615, 204, 227, 191, 214, 39, 42, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, 18446744073709551615, 18446744073709551615, 216, 227, 203, 214, 41, 42, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, 18446744073709551615, 18446744073709551615, 228, 234, 215, 221, 42, 44, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, 18446744073709551615, 18446744073709551615, 235, 243, 222, 230, 44, 45, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16827399947339178045, 496128657873109341, 18446744073709551615, 18446744073709551615, 252, 293, 239, 280, 47, 53, true, "Atlantic, Pacific and Indian oceans,[XII]", "Atlantic, Pacific and Indian oceans,[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, 18446744073709551615, 18446744073709551615, 252, 260, 239, 247, 47, 48, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, 18446744073709551615, 18446744073709551615, 262, 269, 249, 256, 49, 50, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 3575373331357445963, 1702692810903063225, 18446744073709551615, 18446744073709551615, 274, 293, 261, 280, 51, 53, true, "Indian oceans,[XII]", "Indian oceans,[XII]"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, 18446744073709551615, 18446744073709551615, 281, 293, 268, 280, 52, 53, true, "oceans,[XII]", "oceans,[XII]"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 16381206561323757770, 14007677850696664277, 18446744073709551615, 18446744073709551615, 294, 300, 281, 287, 53, 54, true, "giving", "giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, 18446744073709551615, 18446744073709551615, 308, 314, 295, 301, 56, 58, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, 18446744073709551615, 18446744073709551615, 315, 361, 302, 348, 58, 63, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, 18446744073709551615, 18446744073709551615, 362, 368, 349, 355, 63, 65, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, 18446744073709551615, 18446744073709551615, 369, 374, 356, 361, 65, 66, true, "world", "world"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, 18446744073709551615, 18446744073709551615, 376, 637, 363, 624, 67, 118, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, 18446744073709551615, 18446744073709551615, 376, 410, 363, 397, 67, 71, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, 18446744073709551615, 18446744073709551615, 389, 395, 376, 382, 68, 69, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, 18446744073709551615, 18446744073709551615, 411, 415, 398, 402, 71, 72, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, 18446744073709551615, 18446744073709551615, 416, 438, 403, 425, 72, 75, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, 18446744073709551615, 18446744073709551615, 416, 423, 403, 410, 72, 73, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, 18446744073709551615, 18446744073709551615, 428, 438, 415, 425, 74, 75, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, 18446744073709551615, 18446744073709551615, 439, 445, 426, 432, 75, 77, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, 18446744073709551615, 18446744073709551615, 446, 451, 433, 438, 77, 78, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, 18446744073709551615, 18446744073709551615, 453, 460, 440, 447, 79, 80, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, 18446744073709551615, 18446744073709551615, 461, 467, 448, 454, 80, 82, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, 18446744073709551615, 18446744073709551615, 480, 491, 467, 478, 85, 86, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, 18446744073709551615, 18446744073709551615, 492, 498, 479, 485, 86, 88, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, 18446744073709551615, 18446744073709551615, 505, 521, 492, 508, 90, 93, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, 18446744073709551615, 18446744073709551615, 505, 510, 492, 497, 90, 91, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, 18446744073709551615, 18446744073709551615, 515, 521, 502, 508, 92, 93, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, 18446744073709551615, 18446744073709551615, 522, 528, 509, 515, 93, 95, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, 18446744073709551615, 18446744073709551615, 541, 558, 528, 545, 98, 101, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, 18446744073709551615, 18446744073709551615, 541, 548, 528, 535, 98, 99, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, 18446744073709551615, 18446744073709551615, 553, 558, 540, 545, 100, 101, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, 18446744073709551615, 18446744073709551615, 559, 565, 546, 552, 101, 103, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, 18446744073709551615, 18446744073709551615, 566, 571, 553, 558, 103, 104, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, 18446744073709551615, 18446744073709551615, 579, 594, 566, 581, 107, 109, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, 18446744073709551615, 18446744073709551615, 595, 603, 582, 590, 109, 111, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, 18446744073709551615, 18446744073709551615, 604, 618, 591, 605, 111, 113, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, 18446744073709551615, 18446744073709551615, 619, 625, 606, 612, 113, 115, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, 18446744073709551615, 18446744073709551615, 626, 636, 613, 623, 115, 117, true, "north west", "north west"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, 18446744073709551615, 18446744073709551615, 638, 961, 625, 948, 118, 176, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, 18446744073709551615, 18446744073709551615, 642, 659, 629, 646, 119, 121, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, 18446744073709551615, 18446744073709551615, 660, 667, 647, 654, 121, 122, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, 18446744073709551615, 18446744073709551615, 668, 676, 655, 663, 122, 124, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, 18446744073709551615, 18446744073709551615, 677, 682, 664, 669, 124, 125, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, 18446744073709551615, 18446744073709551615, 683, 689, 670, 676, 125, 127, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, 18446744073709551615, 18446744073709551615, 690, 704, 677, 691, 127, 129, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, 18446744073709551615, 18446744073709551615, 709, 717, 696, 704, 130, 132, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, 18446744073709551615, 18446744073709551615, 718, 735, 705, 722, 132, 134, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, 18446744073709551615, 18446744073709551615, 736, 742, 723, 729, 134, 136, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, 18446744073709551615, 18446744073709551615, 743, 758, 730, 745, 136, 138, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, 18446744073709551615, 18446744073709551615, 767, 776, 754, 763, 140, 142, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, 18446744073709551615, 18446744073709551615, 778, 798, 765, 785, 143, 145, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, 18446744073709551615, 18446744073709551615, 799, 806, 786, 793, 145, 146, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, 18446744073709551615, 18446744073709551615, 807, 820, 794, 807, 146, 148, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, 18446744073709551615, 18446744073709551615, 821, 823, 808, 810, 148, 149, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, 18446744073709551615, 18446744073709551615, 824, 864, 811, 851, 149, 156, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, 18446744073709551615, 18446744073709551615, 824, 837, 811, 824, 149, 151, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, 18446744073709551615, 18446744073709551615, 839, 851, 826, 838, 152, 154, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, 18446744073709551615, 18446744073709551615, 856, 864, 843, 851, 155, 156, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, 18446744073709551615, 18446744073709551615, 865, 871, 852, 858, 156, 158, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, 18446744073709551615, 18446744073709551615, 872, 886, 859, 873, 158, 160, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, 18446744073709551615, 18446744073709551615, 892, 910, 879, 897, 162, 165, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, 18446744073709551615, 18446744073709551615, 916, 928, 903, 915, 167, 169, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, 18446744073709551615, 18446744073709551615, 929, 931, 916, 918, 169, 170, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, 18446744073709551615, 18446744073709551615, 932, 939, 919, 926, 170, 171, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, 18446744073709551615, 18446744073709551615, 948, 960, 935, 947, 173, 175, true, "Indian Ocean", "Indian Ocean"], ["sentence", "", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, 18446744073709551615, 18446744073709551615, 962, 1384, 949, 1371, 176, 254, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, 18446744073709551615, 18446744073709551615, 966, 991, 953, 978, 177, 180, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, 18446744073709551615, 18446744073709551615, 992, 1020, 979, 1007, 180, 187, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, 18446744073709551615, 18446744073709551615, 998, 1000, 985, 987, 182, 183, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, 18446744073709551615, 18446744073709551615, 1007, 1019, 994, 1006, 184, 186, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, 18446744073709551615, 18446744073709551615, 1021, 1025, 1008, 1012, 187, 188, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, 18446744073709551615, 18446744073709551615, 1028, 1036, 1015, 1023, 189, 190, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, 18446744073709551615, 18446744073709551615, 1037, 1041, 1024, 1028, 190, 191, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, 18446744073709551615, 18446744073709551615, 1042, 1044, 1029, 1031, 191, 192, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, 18446744073709551615, 18446744073709551615, 1045, 1052, 1032, 1039, 192, 193, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, 18446744073709551615, 18446744073709551615, 1053, 1056, 1040, 1043, 193, 194, true, "km2", "km2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, 18446744073709551615, 18446744073709551615, 1057, 1072, 1044, 1059, 194, 199, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, 18446744073709551615, 18446744073709551615, 1058, 1065, 1045, 1052, 195, 196, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, 18446744073709551615, 18446744073709551615, 1066, 1071, 1053, 1058, 196, 198, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, 18446744073709551615, 18446744073709551615, 1077, 1081, 1064, 1068, 200, 201, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, 18446744073709551615, 18446744073709551615, 1084, 1100, 1071, 1087, 202, 204, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, 18446744073709551615, 18446744073709551615, 1101, 1103, 1088, 1090, 204, 205, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, 18446744073709551615, 18446744073709551615, 1104, 1108, 1091, 1095, 205, 206, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, 18446744073709551615, 18446744073709551615, 1109, 1111, 1096, 1098, 206, 207, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, 18446744073709551615, 18446744073709551615, 1112, 1119, 1099, 1106, 207, 208, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, 18446744073709551615, 18446744073709551615, 1120, 1122, 1107, 1109, 208, 209, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, 18446744073709551615, 18446744073709551615, 1123, 1125, 1110, 1112, 209, 210, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, 18446744073709551615, 18446744073709551615, 1126, 1133, 1113, 1120, 210, 211, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, 18446744073709551615, 18446744073709551615, 1134, 1145, 1121, 1132, 211, 212, true, "2023.[5][8]", "2023.[5][8]"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, 18446744073709551615, 18446744073709551615, 1146, 1152, 1133, 1139, 212, 213, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, 18446744073709551615, 18446744073709551615, 1153, 1155, 1140, 1142, 213, 214, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 101756270285429158, 6309445736017161690, 18446744073709551615, 18446744073709551615, 1158, 1192, 1145, 1179, 215, 218, true, "unitary semi-presidential republic", "unitary semi-presidential republic"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, 18446744073709551615, 18446744073709551615, 1166, 1183, 1153, 1170, 216, 217, true, "semi-presidential", "semi-presidential"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, 18446744073709551615, 18446744073709551615, 1193, 1197, 1180, 1184, 218, 219, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, 18446744073709551615, 18446744073709551615, 1202, 1209, 1189, 1196, 220, 221, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, 18446744073709551615, 18446744073709551615, 1210, 1212, 1197, 1199, 221, 222, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, 18446744073709551615, 18446744073709551615, 1213, 1218, 1200, 1205, 222, 223, true, "Paris", "Paris"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7060736712546470087, 14254659311922306724, 18446744073709551615, 18446744073709551615, 1224, 1246, 1211, 1233, 225, 228, true, "countrys largest city", "country's largest city"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, 18446744073709551615, 18446744073709551615, 1224, 1233, 1211, 1220, 225, 226, true, "countrys", "country's"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, 18446744073709551615, 18446744073709551615, 1251, 1286, 1238, 1273, 229, 234, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, 18446744073709551615, 18446744073709551615, 1269, 1286, 1256, 1273, 232, 234, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, 18446744073709551615, 18446744073709551615, 1288, 1311, 1275, 1298, 235, 239, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, 18446744073709551615, 18446744073709551615, 1312, 1319, 1299, 1306, 239, 240, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, 18446744073709551615, 18446744073709551615, 1320, 1383, 1307, 1370, 240, 253, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, 18446744073709551615, 18446744073709551615, 1320, 1329, 1307, 1316, 240, 241, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, 18446744073709551615, 18446744073709551615, 1331, 1335, 1318, 1322, 242, 243, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, 18446744073709551615, 18446744073709551615, 1337, 1345, 1324, 1332, 244, 245, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, 18446744073709551615, 18446744073709551615, 1347, 1352, 1334, 1339, 246, 247, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, 18446744073709551615, 18446744073709551615, 1354, 1362, 1341, 1349, 248, 249, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, 18446744073709551615, 18446744073709551615, 1364, 1374, 1351, 1361, 250, 251, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, 18446744073709551615, 18446744073709551615, 1379, 1383, 1366, 1370, 252, 253, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", "en", 0.9300000071525574], ["semantic", "text", 0.9599999785423279]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text-hash": 13399504000106611798, "type": "text"} -{"applied-models": ["cite", "conn", "expression", "geoloc", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "", "hash": 4522339299074192207, "instances": {"data": [["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, 18446744073709551615, 18446744073709551615, 0, 188, 0, 188, 0, 28, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, 18446744073709551615, 18446744073709551615, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, 18446744073709551615, 18446744073709551615, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, 18446744073709551615, 18446744073709551615, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004679635976, 18446744073709551615, 18446744073709551615, 24, 41, 24, 41, 5, 7, true, "interband pairing", "interband pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, 18446744073709551615, 18446744073709551615, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["term", "enum-term-mark-1", 4522339299074192207, "TEXT", "#", 1.0, 18178792033664231045, 5215905145529509301, 18446744073709551615, 18446744073709551615, 45, 87, 45, 87, 8, 13, true, "two-band s-wave and d-wave superconductors", "two-band s-wave and d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, 18446744073709551615, 18446744073709551615, 45, 53, 45, 53, 8, 9, true, "two-band", "two-band"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, 18446744073709551615, 18446744073709551615, 54, 60, 54, 60, 9, 10, true, "s-wave", "s-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15865120430118694837, 607662791561950043, 18446744073709551615, 18446744073709551615, 65, 87, 65, 87, 11, 13, true, "d-wave superconductors", "d-wave superconductors"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, 18446744073709551615, 18446744073709551615, 65, 71, 65, 71, 11, 12, true, "d-wave", "d-wave"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, 18446744073709551615, 18446744073709551615, 88, 92, 88, 92, 13, 14, true, "with", "with"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7066208506210013514, 1315102098090612032, 18446744073709551615, 18446744073709551615, 93, 105, 93, 105, 14, 16, true, "D4h symmetry", "D4h symmetry"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, 18446744073709551615, 18446744073709551615, 93, 96, 93, 96, 14, 15, true, "D4h", "D4h"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, 18446744073709551615, 18446744073709551615, 106, 113, 106, 113, 16, 18, true, "in both", "in both"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5172475826427571765, 16752879714615995236, 18446744073709551615, 18446744073709551615, 114, 137, 114, 137, 18, 20, true, "time-reversal invariant", "time-reversal invariant"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, 18446744073709551615, 18446744073709551615, 114, 127, 114, 127, 18, 19, true, "time-reversal", "time-reversal"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, 18446744073709551615, 18446744073709551615, 146, 148, 146, 148, 22, 23, true, "as", "as"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 10193294999568911218, 6331719907444433820, 18446744073709551615, 18446744073709551615, 149, 171, 149, 171, 23, 25, true, "time-reversal symmetry", "time-reversal symmetry"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, 18446744073709551615, 18446744073709551615, 149, 162, 149, 162, 23, 24, true, "time-reversal", "time-reversal"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, 18446744073709551615, 18446744073709551615, 172, 180, 172, 180, 25, 26, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, 18446744073709551615, 18446744073709551615, 181, 187, 181, 187, 26, 27, true, "states", "states"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, 18446744073709551615, 18446744073709551615, 189, 384, 189, 384, 28, 58, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, 18446744073709551615, 18446744073709551615, 193, 201, 193, 201, 29, 30, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, 18446744073709551615, 18446744073709551615, 202, 204, 202, 204, 30, 31, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, 18446744073709551615, 18446744073709551615, 205, 214, 205, 214, 31, 32, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, 18446744073709551615, 18446744073709551615, 215, 244, 215, 244, 32, 35, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, 18446744073709551615, 18446744073709551615, 249, 264, 249, 264, 36, 38, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, 18446744073709551615, 18446744073709551615, 265, 271, 265, 271, 38, 40, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, 18446744073709551615, 18446744073709551615, 272, 286, 272, 286, 40, 41, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, 18446744073709551615, 18446744073709551615, 288, 293, 288, 293, 42, 43, true, "nodes", "nodes"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 3766089650286616147, 5895288868427388531, 18446744073709551615, 18446744073709551615, 294, 309, 294, 309, 43, 45, true, "can (dis)appear", "can (dis)appear"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, 18446744073709551615, 18446744073709551615, 298, 309, 298, 309, 44, 45, true, "(dis)appear", "(dis)appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, 18446744073709551615, 18446744073709551615, 311, 316, 311, 316, 46, 47, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, 18446744073709551615, 18446744073709551615, 322, 327, 322, 327, 49, 50, true, "leave", "leave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106670696871780136, 17807492235586576248, 18446744073709551615, 18446744073709551615, 328, 351, 328, 351, 50, 52, true, "high-symmetry locations", "high-symmetry locations"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, 18446744073709551615, 18446744073709551615, 328, 341, 328, 341, 50, 51, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, 18446744073709551615, 18446744073709551615, 357, 374, 357, 374, 53, 55, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, 18446744073709551615, 18446744073709551615, 375, 383, 375, 383, 55, 57, true, "is tuned", "is tuned"], ["sentence", "", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, 18446744073709551615, 18446744073709551615, 385, 594, 385, 594, 58, 93, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, 18446744073709551615, 18446744073709551615, 398, 404, 398, 404, 60, 62, true, "in the", "in the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15559186615879240368, 12910915472651789195, 18446744073709551615, 18446744073709551615, 405, 416, 405, 416, 62, 64, true, "d-wave case", "d-wave case"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, 18446744073709551615, 18446744073709551615, 405, 411, 405, 411, 62, 63, true, "d-wave", "d-wave"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, 18446744073709551615, 18446744073709551615, 421, 425, 421, 425, 66, 67, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, 18446744073709551615, 18446744073709551615, 426, 430, 426, 430, 67, 68, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, 18446744073709551615, 18446744073709551615, 440, 454, 440, 454, 70, 72, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, 18446744073709551615, 18446744073709551615, 455, 475, 455, 475, 72, 74, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, 18446744073709551615, 18446744073709551615, 481, 490, 481, 490, 75, 76, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, 18446744073709551615, 18446744073709551615, 491, 498, 491, 498, 76, 77, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, 18446744073709551615, 18446744073709551615, 499, 508, 499, 508, 77, 78, true, "increases", "increases"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 18352755674675419019, 8051640294707098683, 18446744073709551615, 18446744073709551615, 510, 547, 510, 547, 79, 84, true, "flat zero-energy Andreev bound states", "flat zero-energy Andreev bound states"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, 18446744073709551615, 18446744073709551615, 515, 526, 515, 526, 80, 81, true, "zero-energy", "zero-energy"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, 18446744073709551615, 18446744073709551615, 548, 555, 548, 555, 84, 86, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, 18446744073709551615, 18446744073709551615, 560, 570, 560, 570, 87, 88, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, 18446744073709551615, 18446744073709551615, 571, 573, 571, 573, 88, 89, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, 18446744073709551615, 18446744073709551615, 574, 593, 574, 593, 89, 92, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", "en", 0.8799999952316284], ["semantic", "text", 0.9900000095367432]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text-hash": 7455828584320671675, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 7165733783736451605, 9933574393783992989, null, null, 0, 177, 0, 164, 0, 37, true, "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe.", "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe."], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949801923, null, null, 0, 6, 0, 6, 0, 1, true, "France", "France"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 3013851222087677827, 2365012408510787722, null, null, 7, 31, 7, 26, 1, 9, true, "(French: [f\u0281\u0251\u0303s] \u24d8)", "(French: [f\u0281\u0251\u0303s] \u24d8)"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 8106352768017183538, 14135021865049995092, null, null, 16, 26, 16, 23, 4, 7, true, "[f\u0281\u0251\u0303s]", "[f\u0281\u0251\u0303s]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206575305750373, 3269040892355287555, null, null, 16, 25, 16, 22, 4, 6, true, "[f\u0281\u0251\u0303s", "[f\u0281\u0251\u0303s"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704340336, 2654092909150552370, null, null, 27, 30, 24, 25, 7, 8, true, "\u24d8", "\u24d8"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17441062468440299130, 1252048624247041617, null, null, 48, 63, 43, 58, 12, 14, true, "French Republic", "French Republic"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 11356497368310893887, 13708671681789009535, null, null, 64, 122, 59, 109, 14, 24, true, "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])", "(French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z])"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9222317529736412633, 13157151896249885007, null, null, 73, 95, 68, 88, 17, 19, true, "R\u00e9publique fran\u00e7aise", "R\u00e9publique fran\u00e7aise"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 3505666090650518630, 15438411233664829842, null, null, 96, 106, 89, 98, 19, 21, true, "[\u0281epyblik", "[\u0281epyblik"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6171719307028286686, 2027669270476122887, null, null, 107, 121, 99, 108, 21, 23, true, "f\u0281\u0251\u0303s\u025b\u02d0z]", "f\u0281\u0251\u0303s\u025b\u02d0z]"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 389609625697295964, 4819984163543340016, null, null, 123, 127, 110, 114, 25, 28, true, "[14]", "[14]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481978, 16516418858946608100, null, null, 124, 126, 111, 113, 26, 27, true, "14", "14"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154113823853, null, null, 128, 130, 115, 117, 28, 29, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263937425, null, null, 133, 140, 120, 127, 30, 31, true, "country", "country"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 13076166426216861763, 8486882507226708300, null, null, 141, 158, 128, 145, 31, 33, true, "located primarily", "located primarily"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320546026, null, null, 159, 161, 146, 148, 33, 34, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6634671142799218620, 10150276053554071667, null, null, 162, 176, 149, 163, 34, 36, true, "Western Europe", "Western Europe"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 16381206541025400639, 3132305590202304515, null, null, 170, 176, 157, 163, 35, 36, true, "Europe", "Europe"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 6189739574856989794, 5347129219762274320, null, null, 178, 375, 165, 362, 37, 73, true, "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world.", "It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world."], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14637951607890754969, 402968920972442625, null, null, 186, 194, 173, 181, 39, 40, true, "includes", "includes"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8894305605935208252, 12062948095316684045, null, null, 195, 211, 182, 198, 40, 42, true, "overseas regions", "overseas regions"], ["term", "enum-term-mark-3", 9818235231875948258, "TEXT", "#", 1.0, 15716219910512026318, 10134046109933299907, null, null, 204, 227, 191, 214, 41, 44, true, "regions and territories", "regions and territories"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 742108606525961391, 301790709556208243, null, null, 216, 227, 203, 214, 43, 44, true, "territories", "territories"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342249879, null, null, 228, 234, 215, 221, 44, 46, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650324458704782736, 10702486193743709015, null, null, 235, 243, 222, 230, 46, 47, true, "Americas", "Americas"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 820203855428083856, 16279894764651307170, null, null, 252, 280, 239, 267, 49, 54, true, "Atlantic, Pacific and Indian", "Atlantic, Pacific and Indian"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650294626349057313, 15914513546830396825, null, null, 252, 260, 239, 247, 49, 50, true, "Atlantic", "Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106352733874071343, 14751516024473840502, null, null, 262, 269, 249, 256, 51, 52, true, "Pacific", "Pacific"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 4553045173532721202, 17291436396596241777, null, null, 274, 287, 261, 274, 53, 55, true, "Indian oceans", "Indian oceans"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 1756733593034042776, 17602961118336296345, null, null, 281, 293, 268, 280, 54, 59, true, "oceans,[XII]", "oceans,[XII]"], ["parenthesis", "square brackets", 9818235231875948258, "TEXT", "#", 1.0, 329104147687597164, 12284735790511259080, null, null, 288, 293, 275, 280, 56, 59, true, "[XII]", "[XII]"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895542235, 10796895691287030884, null, null, 289, 292, 276, 279, 57, 58, true, "XII", "XII"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 14650940714797320124, 6236592394333508229, null, null, 292, 300, 279, 287, 58, 60, true, "] giving", "] giving"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206565712212855, 1236325873132826249, null, null, 308, 314, 295, 301, 62, 64, true, "of the", "of the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14052688401474323454, 13690370747401099164, null, null, 315, 361, 302, 348, 64, 69, true, "largest discontiguous exclusive economic zones", "largest discontiguous exclusive economic zones"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342127289, null, null, 362, 368, 349, 355, 69, 71, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161607326646, 1454068451125029934, null, null, 369, 374, 356, 361, 71, 72, true, "world", "world"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 14713286702685564143, 12342897629493115066, null, null, 376, 637, 363, 624, 73, 124, true, "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west.", "Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8020022223670320918, 1087396221906448864, null, null, 376, 410, 363, 397, 73, 77, true, "Metropolitan France shares borders", "Metropolitan France shares borders"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949348450, null, null, 389, 395, 376, 382, 74, 75, true, "France", "France"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648605737316, null, null, 411, 415, 398, 402, 77, 78, true, "with", "with"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 16696858386959013905, 9953713563101765953, null, null, 416, 438, 403, 425, 78, 81, true, "Belgium and Luxembourg", "Belgium and Luxembourg"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351528276606806, 17452206963477359672, null, null, 416, 423, 403, 410, 78, 79, true, "Belgium", "Belgium"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1406869670274782120, 680628993648520530, null, null, 428, 438, 415, 425, 80, 81, true, "Luxembourg", "Luxembourg"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969337213, null, null, 439, 445, 426, 432, 81, 83, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161758950314, 2918999025889257964, null, null, 446, 451, 433, 438, 83, 84, true, "north", "north"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106351570048323596, 17557988429899748833, null, null, 453, 460, 440, 447, 85, 86, true, "Germany", "Germany"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969336735, null, null, 461, 467, 448, 454, 86, 88, true, "to the", "to the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2664439525053388608, 11193616686634147618, null, null, 480, 491, 467, 478, 91, 92, true, "Switzerland", "Switzerland"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969308714, null, null, 492, 498, 479, 485, 92, 94, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 4354215944273037694, 5682028639051353372, null, null, 505, 521, 492, 508, 96, 99, true, "Italy and Monaco", "Italy and Monaco"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162355748898, 1482575002715610334, null, null, 505, 510, 492, 497, 96, 97, true, "Italy", "Italy"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16381206560459902527, 14799408677019156812, null, null, 515, 521, 502, 508, 98, 99, true, "Monaco", "Monaco"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310818, null, null, 522, 528, 509, 515, 99, 101, true, "to the", "to the"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 12159164131217588284, 4955957401478532251, null, null, 541, 558, 528, 545, 104, 107, true, "Andorra and Spain", "Andorra and Spain"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106479274243514347, 17980360239699861283, null, null, 541, 548, 528, 535, 104, 105, true, "Andorra", "Andorra"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162342370538, 1482633785259993559, null, null, 553, 558, 540, 545, 106, 107, true, "Spain", "Spain"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969310071, null, null, 559, 565, 546, 552, 107, 109, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161786112263, 1509683392823934352, null, null, 566, 571, 553, 558, 109, 110, true, "south", "south"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1007413068724892642, 291489006120572005, null, null, 579, 594, 566, 581, 113, 115, true, "maritime border", "maritime border"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14638857868319795209, 11791522442449061322, null, null, 595, 603, 582, 590, 115, 117, true, "with the", "with the"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17782056979161528852, 9153048661633494047, null, null, 604, 618, 591, 605, 117, 119, true, "United Kingdom", "United Kingdom"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969317320, null, null, 619, 625, 606, 612, 119, 121, true, "to the", "to the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13933284241117180316, 9549244500258880510, null, null, 626, 636, 613, 623, 121, 123, true, "north west", "north west"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 17003561248590084050, 7083138465016524650, null, null, 638, 961, 625, 948, 124, 182, true, "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean.", "Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 16902286799032688327, 9492031817564827183, null, null, 642, 659, 629, 646, 125, 127, true, "metropolitan area", "metropolitan area"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106397490080681192, 5136131594957919962, null, null, 660, 667, 647, 654, 127, 128, true, "extends", "extends"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469296563, null, null, 668, 676, 655, 663, 128, 130, true, "from the", "from the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162172676793, 1498342144318401380, null, null, 677, 682, 664, 669, 130, 131, true, "Rhine", "Rhine"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969321048, null, null, 683, 689, 670, 676, 131, 133, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1699059536281862869, 17597688446806609953, null, null, 690, 704, 677, 691, 133, 135, true, "Atlantic Ocean", "Atlantic Ocean"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 14637917359887717745, 5728505801469293615, null, null, 709, 717, 696, 704, 136, 138, true, "from the", "from the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2292074113456689375, 10918321493483037973, null, null, 718, 735, 705, 722, 138, 140, true, "Mediterranean Sea", "Mediterranean Sea"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206519425733256, 3048986274969290019, null, null, 736, 742, 723, 729, 140, 142, true, "to the", "to the"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 795096431028441229, 12158077684056403648, null, null, 743, 758, 730, 745, 142, 144, true, "English Channel", "English Channel"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 2906594566132974813, 13737227933071728015, null, null, 767, 776, 754, 763, 146, 148, true, "North Sea", "North Sea"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 7078461255531831470, 4201254213649319275, null, null, 778, 798, 765, 785, 149, 151, true, "overseas territories", "overseas territories"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709326671, null, null, 799, 806, 786, 793, 151, 152, true, "include", "include"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1396147880648722105, 6206290065458304556, null, null, 807, 820, 794, 807, 152, 154, true, "French Guiana", "French Guiana"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320786848, null, null, 821, 823, 808, 810, 154, 155, true, "in", "in"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 10895480552512041513, 12515333245813396531, null, null, 824, 864, 811, 851, 155, 162, true, "South America, Saint Pierre and Miquelon", "South America, Saint Pierre and Miquelon"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1534386675771170432, 5620829662395863596, null, null, 824, 837, 811, 824, 155, 157, true, "South America", "South America"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 17937693740200172107, 3021880859266664417, null, null, 839, 851, 826, 838, 158, 160, true, "Saint Pierre", "Saint Pierre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650310996981700862, 12468752396436869924, null, null, 856, 864, 843, 851, 161, 162, true, "Miquelon", "Miquelon"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 16381206560518651853, 14773710306342095353, null, null, 865, 871, 852, 858, 162, 164, true, "in the", "in the"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15250972217703672587, 512438848472377060, null, null, 872, 886, 859, 873, 164, 166, true, "North Atlantic", "North Atlantic"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 11554653182817214443, 15552313849565549382, null, null, 892, 910, 879, 897, 168, 171, true, "French West Indies", "French West Indies"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 10254605917578642058, 14582149795939180163, null, null, 916, 928, 903, 915, 173, 175, true, "many islands", "many islands"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320793637, null, null, 929, 931, 916, 918, 175, 176, true, "in", "in"], ["geoloc", "continent", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106340997491787657, 13345472904677262792, null, null, 932, 939, 919, 926, 176, 177, true, "Oceania", "Oceania"], ["geoloc", "aquatic-region", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1487365334469731864, 11192311481002475940, null, null, 948, 960, 935, 947, 179, 181, true, "Indian Ocean", "Indian Ocean"], ["sentence", "proper", 9818235231875948258, "TEXT", "#", 1.0, 10166166460142346007, 5818608339058761491, null, null, 962, 1384, 949, 1371, 182, 276, true, "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice."], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8927146464600923593, 3922788236388235307, null, null, 966, 991, 953, 978, 183, 186, true, "eighteen integral regions", "eighteen integral regions"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 10892619794174886288, 17879940029404873488, null, null, 992, 1020, 979, 1007, 186, 193, true, "(five of which are overseas)", "(five of which are overseas)"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487428509, null, null, 998, 1000, 985, 987, 188, 189, true, "of", "of"], ["verb", "compound-verb", 9818235231875948258, "TEXT", "#", 1.0, 12677082874051014734, 16862247600025167711, null, null, 1007, 1019, 994, 1006, 190, 192, true, "are overseas", "are overseas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625741117166, 4821166830861414740, null, null, 1021, 1025, 1008, 1012, 193, 194, true, "span", "span"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 14652282389360801402, 14467085604769233213, null, null, 1028, 1036, 1015, 1023, 195, 196, true, "combined", "combined"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625700779495, 4773829822730072418, null, null, 1037, 1041, 1024, 1028, 196, 197, true, "area", "area"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487435488, null, null, 1042, 1044, 1029, 1031, 197, 198, true, "of", "of"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104407715375074824, 1700623151524050233, null, null, 1045, 1052, 1032, 1039, 198, 201, true, "643,801", "643,801"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895605261, 10796893189148903013, null, null, 1053, 1056, 1040, 1043, 201, 203, true, "km2", "km2"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486414, 16516410147586311652, null, null, 1053, 1055, 1040, 1042, 201, 202, true, "km", "km"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235162, 2654033242220620585, null, null, 1055, 1056, 1042, 1043, 202, 203, true, "2", "2"], ["parenthesis", "round brackets", 9818235231875948258, "TEXT", "#", 1.0, 4906416255891308311, 1387909330414744194, null, null, 1057, 1072, 1044, 1059, 203, 210, true, "(248,573 sq mi)", "(248,573 sq mi)"], ["numval", "fval", 9818235231875948258, "TEXT", "#", 1.0, 8104408548610760820, 6463814622222040278, null, null, 1058, 1065, 1045, 1052, 204, 207, true, "248,573", "248,573"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104161639422146, 3144448772729273576, null, null, 1066, 1071, 1053, 1058, 207, 209, true, "sq mi", "sq mi"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 389609625695387621, 4868500945036381579, null, null, 1077, 1081, 1064, 1068, 211, 212, true, "have", "have"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 12541670314717034970, 703148838985843878, null, null, 1084, 1100, 1071, 1087, 213, 215, true, "total population", "total population"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487448267, null, null, 1101, 1103, 1088, 1090, 215, 216, true, "of", "of"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618865305, 4871131305966782102, null, null, 1104, 1108, 1091, 1095, 216, 217, true, "over", "over"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541481163, 16516415933924702527, null, null, 1109, 1111, 1096, 1098, 217, 218, true, "68", "68"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106464557871075584, 1700255472890257425, null, null, 1112, 1119, 1099, 1106, 218, 219, true, "million", "million"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541487053, 16516410169675354660, null, null, 1120, 1122, 1107, 1109, 219, 220, true, "as", "as"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541485670, 16516410522487455062, null, null, 1123, 1125, 1110, 1112, 220, 221, true, "of", "of"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106396157936763088, 232783200992826136, null, null, 1126, 1133, 1113, 1120, 221, 222, true, "January", "January"], ["expression", "wtoken-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 7362912214676801533, 485090574668066838, null, null, 1134, 1145, 1121, 1132, 222, 230, true, "2023.[5][8]", "2023.[5][8]"], ["numval", "year", 9818235231875948258, "TEXT", "#", 1.0, 389609625548777251, 4871157181485963100, null, null, 1134, 1138, 1121, 1125, 222, 223, true, "2023", "2023"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577901, 10796892691399633238, null, null, 1139, 1142, 1126, 1129, 224, 227, true, "[5]", "[5]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235157, 2654033131002543179, null, null, 1140, 1141, 1127, 1128, 225, 226, true, "5", "5"], ["parenthesis", "reference", 9818235231875948258, "TEXT", "#", 1.0, 12178341415895577838, 10796892702691935623, null, null, 1142, 1145, 1129, 1132, 227, 230, true, "[8]", "[8]"], ["numval", "ival", 9818235231875948258, "TEXT", "#", 1.0, 17767354399704235152, 2654033132467492508, null, null, 1143, 1144, 1130, 1131, 228, 229, true, "8", "8"], ["geoloc", "country", 9818235231875948258, "TEXT", "#", 1.0, 16381206530124097499, 2075883652949332577, null, null, 1146, 1152, 1133, 1139, 230, 231, true, "France", "France"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486535, 16516410154112448431, null, null, 1153, 1155, 1140, 1142, 231, 232, true, "is", "is"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14782540711164886662, 14111360077134393327, null, null, 1158, 1170, 1145, 1157, 233, 235, true, "unitary semi", "unitary semi"], ["expression", "word-concatenation", 9818235231875948258, "TEXT", "#", 1.0, 18068372194781726140, 2925318021227219899, null, null, 1166, 1183, 1153, 1170, 234, 237, true, "semi-presidential", "semi-presidential"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 9493572096187311884, 17586523526652496832, null, null, 1171, 1192, 1158, 1179, 236, 238, true, "presidential republic", "presidential republic"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 389609625618037948, 4871103648607633852, null, null, 1193, 1197, 1180, 1184, 238, 239, true, "with", "with"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106397824284531415, 8982419828283128022, null, null, 1202, 1209, 1189, 1196, 240, 241, true, "capital", "capital"], ["conn", "single-conn", 9818235231875948258, "TEXT", "#", 1.0, 15441160910541486538, 16516410147320877855, null, null, 1210, 1212, 1197, 1199, 241, 242, true, "in", "in"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104159094507756, 2907606697158347274, null, null, 1213, 1218, 1200, 1205, 242, 243, true, "Paris", "Paris"], ["expression", "apostrophe", 9818235231875948258, "TEXT", "#", 1.0, 14652284122026420470, 2113213664392218651, null, null, 1224, 1233, 1211, 1220, 245, 248, true, "countrys", "country's"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 8106398484406305065, 9956244646263873511, null, null, 1224, 1231, 1211, 1218, 245, 246, true, "country", "country"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 13491731564569135959, 5310634626438687925, null, null, 1232, 1246, 1219, 1233, 247, 250, true, "s largest city", "s largest city"], ["term", "enum-term-mark-1", 9818235231875948258, "TEXT", "#", 1.0, 6784284096138223592, 1541436095433469975, null, null, 1251, 1286, 1238, 1273, 251, 256, true, "main cultural and commercial centre", "main cultural and commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14010050785807764456, 6303421959957138741, null, null, 1269, 1286, 1256, 1273, 254, 256, true, "commercial centre", "commercial centre"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 334886132418797355, 3030904992914781526, null, null, 1288, 1311, 1275, 1298, 257, 261, true, "other major urban areas", "other major urban areas"], ["verb", "single-verb", 9818235231875948258, "TEXT", "#", 1.0, 8106398345764800179, 17288789034709490952, null, null, 1312, 1319, 1299, 1306, 261, 262, true, "include", "include"], ["term", "enum-term-mark-4", 9818235231875948258, "TEXT", "#", 1.0, 3362246297130503347, 10546663701406255960, null, null, 1320, 1383, 1307, 1370, 262, 275, true, "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice", "Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 6611313788482067563, 1421980926116406854, null, null, 1320, 1329, 1307, 1316, 262, 263, true, "Marseille", "Marseille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625527037691, 4878729851128794707, null, null, 1331, 1335, 1318, 1322, 264, 265, true, "Lyon", "Lyon"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14652192966284405207, 5257051565285367813, null, null, 1337, 1345, 1324, 1332, 266, 267, true, "Toulouse", "Toulouse"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 329104162140723213, 1509136076521095533, null, null, 1347, 1352, 1334, 1339, 268, 269, true, "Lille", "Lille"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 14650424510486595116, 14176630958499543186, null, null, 1354, 1362, 1341, 1349, 270, 271, true, "Bordeaux", "Bordeaux"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 1387176096815744400, 11687584650007579171, null, null, 1364, 1374, 1351, 1361, 272, 273, true, "Strasbourg", "Strasbourg"], ["term", "single-term", 9818235231875948258, "TEXT", "#", 1.0, 389609625695734419, 4868508732595360680, null, null, 1379, 1383, 1366, 1370, 274, 275, true, "Nice", "Nice"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "properties": {"data": [["language", 9818235231875948258, "TEXT", "#", "en", 0.93], ["semantic", 9818235231875948258, "TEXT", "#", "text", 0.82]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 9818235231875948258, "text": "France (French: [f\u0281\u0251\u0303s] \u24d8), officially the French Republic (French: R\u00e9publique fran\u00e7aise [\u0281epyblik f\u0281\u0251\u0303s\u025b\u02d0z]),[14] is a country located primarily in Western Europe. It also includes overseas regions and territories in the Americas and the Atlantic, Pacific and Indian oceans,[XII] giving it one of the largest discontiguous exclusive economic zones in the world. Metropolitan France shares borders with Belgium and Luxembourg to the north, Germany to the north east, Switzerland to the east, Italy and Monaco to the south east, Andorra and Spain to the south, and a maritime border with the United Kingdom to the north west. Its metropolitan area extends from the Rhine to the Atlantic Ocean and from the Mediterranean Sea to the English Channel and the North Sea; overseas territories include French Guiana in South America, Saint Pierre and Miquelon in the North Atlantic, the French West Indies, and many islands in Oceania and the Indian Ocean. Its eighteen integral regions (five of which are overseas) span a combined area of 643,801 km2 (248,573 sq mi) and have a total population of over 68 million as of January 2023.[5][8] France is a unitary semi-presidential republic with its capital in Paris, the country's largest city and main cultural and commercial centre; other major urban areas include Marseille, Lyon, Toulouse, Lille, Bordeaux, Strasbourg and Nice.", "text_hash": 13399504000106611798, "type": "text"} +{"applied_models": ["cite", "conn", "expression", "geoloc", "language", "link", "name", "numval", "parenthesis", "quote", "semantic", "sentence", "term", "verb"], "dloc": "#", "instances": {"data": [["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 11051047358468778372, 16543359090497504685, null, null, 0, 188, 0, 188, 0, 40, true, "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states.", "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states."], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161640368611, 252083659971879000, null, null, 3, 8, 3, 8, 1, 2, true, "study", "study"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8106398411236812386, 7848142319159848870, null, null, 13, 20, 13, 20, 3, 4, true, "effects", "effects"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570669383, null, null, 21, 23, 21, 23, 4, 5, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301128091, null, null, 24, 33, 24, 33, 5, 6, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587921185, null, null, 34, 41, 34, 41, 6, 7, true, "pairing", "pairing"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541486538, 8258590015498866268, null, null, 42, 44, 42, 44, 7, 8, true, "in", "in"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 14635108738816547137, 5602575627490325472, null, null, 45, 53, 45, 53, 8, 11, true, "two-band", "two-band"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206570221100871, 2911818818181444888, null, null, 49, 55, 49, 55, 10, 12, true, "band s", "band s"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206513162532973, 10180144108192437812, null, null, 54, 60, 54, 60, 11, 14, true, "s-wave", "s-wave"], ["term", "enum-term-mark-2", 4522339299074192207, "TEXT", "#", 1.0, 8560127426779937860, 4026994879422986240, null, null, 56, 66, 56, 66, 13, 16, true, "wave and d", "wave and d"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625633602560, 14144633872330801396, null, null, 56, 60, 56, 60, 13, 14, true, "wave", "wave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250789659, null, null, 65, 71, 65, 71, 15, 18, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 5267005535915851615, 13852357345485708038, null, null, 67, 87, 67, 87, 17, 19, true, "wave superconductors", "wave superconductors"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625618037948, 15834278012163798276, null, null, 88, 92, 88, 92, 19, 20, true, "with", "with"], ["expression", "wtoken-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 12178341415896111199, 8716494315687321109, null, null, 93, 96, 93, 96, 20, 23, true, "D4h", "D4h"], ["numval", "ival", 4522339299074192207, "TEXT", "#", 1.0, 17767354399704235156, 8513040951015345484, null, null, 94, 95, 94, 95, 21, 22, true, "4", "4"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 2516792725790519961, 10765065347046652233, null, null, 95, 105, 95, 105, 22, 24, true, "h symmetry", "h symmetry"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 8106398108997961455, 10784125725225486670, null, null, 106, 113, 106, 113, 24, 26, true, "in both", "in both"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168216057, null, null, 114, 127, 114, 127, 26, 29, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183561878, null, null, 114, 118, 114, 118, 26, 27, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1366921581602115232, 15058186165846257397, null, null, 119, 137, 119, 137, 28, 30, true, "reversal invariant", "reversal invariant"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541487053, 8258614471364991252, null, null, 146, 148, 146, 148, 32, 33, true, "as", "as"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 4977218569014515680, 16460902135168225520, null, null, 149, 162, 149, 162, 33, 36, true, "time-reversal", "time-reversal"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 389609625631241985, 14143245001183567675, null, null, 149, 153, 149, 153, 33, 34, true, "time", "time"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16155708024079339904, 14846007814114510811, null, null, 154, 171, 154, 171, 35, 37, true, "reversal symmetry", "reversal symmetry"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 14652253420366315125, 40105719221584943, null, null, 172, 180, 172, 180, 37, 38, true, "breaking", "breaking"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433885664, null, null, 181, 187, 181, 187, 38, 39, true, "states", "states"], ["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 1209104465871797120, 9119641206068645018, null, null, 189, 384, 189, 384, 40, 75, true, "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned.", "The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned."], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 14814125847222739835, 15458787250226893702, null, null, 193, 201, 193, 201, 41, 42, true, "presence", "presence"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485670, 8258609660570696516, null, null, 202, 204, 202, 204, 42, 43, true, "of", "of"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301123522, null, null, 205, 214, 205, 214, 43, 44, true, "interband", "interband"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 10643238567851381821, 1003183218790757917, null, null, 215, 244, 215, 244, 44, 47, true, "pairing qualitatively changes", "pairing qualitatively changes"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16508916277772113550, 9548067161217124222, null, null, 249, 264, 249, 264, 48, 50, true, "nodal structure", "nodal structure"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206565712212855, 8154557346786713941, null, null, 265, 271, 265, 271, 50, 52, true, "of the", "of the"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 15792723472797475315, 12422683164914826034, null, null, 272, 286, 272, 286, 52, 53, true, "superconductor", "superconductor"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 329104161758737773, 218549475711749511, null, null, 288, 293, 288, 293, 54, 55, true, "nodes", "nodes"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 9107359644454905795, 8505641380862264642, null, null, 298, 309, 298, 309, 56, 60, true, "(dis)appear", "(dis)appear"], ["parenthesis", "round brackets", 4522339299074192207, "TEXT", "#", 1.0, 329104053577713079, 7302082272979819201, null, null, 298, 303, 298, 303, 56, 59, true, "(dis)", "(dis)"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 12178341415895452094, 8713100074317547395, null, null, 299, 302, 299, 302, 57, 58, true, "dis", "dis"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 16381206574684919940, 8627590102959499799, null, null, 303, 309, 303, 309, 59, 60, true, "appear", "appear"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161618191043, 217789220955720825, null, null, 311, 316, 311, 316, 61, 62, true, "merge", "merge"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104161602730844, 248809633339933359, null, null, 322, 327, 322, 327, 64, 65, true, "leave", "leave"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 6103708995185994398, 7884621192383240094, null, null, 328, 341, 328, 341, 65, 68, true, "high-symmetry", "high-symmetry"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4859188827321755536, 9887725278734779219, null, null, 333, 351, 333, 351, 67, 69, true, "symmetry locations", "symmetry locations"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 4825939639025618404, 1480366004677831103, null, null, 357, 374, 357, 374, 70, 72, true, "interband pairing", "interband pairing"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14637951881113682890, 10762423736752708319, null, null, 375, 383, 375, 383, 72, 74, true, "is tuned", "is tuned"], ["sentence", "proper", 4522339299074192207, "TEXT", "#", 1.0, 6347118211199514282, 11885133783377404984, null, null, 385, 594, 385, 594, 75, 114, true, "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states."], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 16381206560518651853, 331521794076237833, null, null, 398, 404, 398, 404, 77, 79, true, "in the", "in the"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 16381206565268905073, 8176988104250764892, null, null, 405, 411, 405, 411, 79, 82, true, "d-wave", "d-wave"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 3545604367994270661, 11829255560935036292, null, null, 407, 416, 407, 416, 81, 83, true, "wave case", "wave case"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 389609625697824147, 15809696082039170992, null, null, 421, 425, 421, 425, 85, 86, true, "find", "find"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 389609625631229034, 14143246580477546901, null, null, 426, 430, 426, 430, 86, 87, true, "that", "that"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 17949534967191918052, 13667336492915616319, null, null, 440, 454, 440, 454, 89, 91, true, "boundary modes", "boundary modes"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 14639749323101624317, 11329625370881090518, null, null, 455, 475, 455, 475, 91, 93, true, "change qualitatively", "change qualitatively"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182600923963915812, 15426515132301159541, null, null, 481, 490, 481, 490, 94, 95, true, "interband", "interband"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106476000544865536, 2825689308587890817, null, null, 491, 498, 491, 498, 95, 96, true, "pairing", "pairing"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 6182652534064064130, 847509291286503975, null, null, 499, 508, 499, 508, 96, 97, true, "increases", "increases"], ["expression", "word-concatenation", 4522339299074192207, "TEXT", "#", 1.0, 7851032859986104784, 2684482694186442329, null, null, 515, 526, 515, 526, 99, 102, true, "zero-energy", "zero-energy"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 1885602650026083434, 12476719833465444023, null, null, 520, 534, 520, 534, 101, 103, true, "energy Andreev", "energy Andreev"], ["verb", "single-verb", 4522339299074192207, "TEXT", "#", 1.0, 329104159325585799, 66191664906118763, null, null, 535, 540, 535, 540, 103, 104, true, "bound", "bound"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 16381206579012822138, 8532356352433796974, null, null, 541, 547, 541, 547, 104, 105, true, "states", "states"], ["verb", "compound-verb", 4522339299074192207, "TEXT", "#", 1.0, 8106397415916477158, 11270396245667704043, null, null, 548, 555, 548, 555, 105, 107, true, "gap out", "gap out"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 8619280147136806734, 6523932076535307667, null, null, 560, 570, 560, 570, 108, 109, true, "transition", "transition"], ["conn", "single-conn", 4522339299074192207, "TEXT", "#", 1.0, 15441160910541485865, 8258609461978936708, null, null, 571, 573, 571, 573, 109, 110, true, "to", "to"], ["term", "single-term", 4522339299074192207, "TEXT", "#", 1.0, 7379047809796703983, 4636803571796194289, null, null, 574, 593, 574, 593, 110, 113, true, "helical edge states", "helical edge states"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "properties": {"data": [["language", 4522339299074192207, "TEXT", "#", "en", 0.87], ["semantic", 4522339299074192207, "TEXT", "#", "text", 0.97]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 4522339299074192207, "text": "We study the effects of interband pairing in two-band s-wave and d-wave superconductors with D4h symmetry in both time-reversal invariant as well as time-reversal symmetry breaking states. The presence of interband pairing qualitatively changes the nodal structure of the superconductor: nodes can (dis)appear, merge, and leave high-symmetry locations when interband pairing is tuned. Furthermore, in the d-wave case, we find that also the boundary modes change qualitatively when interband pairing increases: flat zero-energy Andreev bound states gap out and transition to helical edge states.", "text_hash": 7455828584320671675, "type": "text"} diff --git a/tests/data/texts/test_02A_text_01.jsonl b/tests/data/texts/test_02A_text_01.jsonl index 91bccab4..c535d49e 100644 --- a/tests/data/texts/test_02A_text_01.jsonl +++ b/tests/data/texts/test_02A_text_01.jsonl @@ -1 +1 @@ -{"applied-models": ["cite", "expression", "language", "lapos", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "", "hash": 253473544312511038, "instances": {"data": [["sentence", "", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, 18446744073709551615, 18446744073709551615, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, 18446744073709551615, 18446744073709551615, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, 18446744073709551615, 18446744073709551615, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.5799999833106995]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"applied_models": ["cite", "expression", "language", "link", "name", "numval", "parenthesis", "quote", "sentence", "term"], "dloc": "#", "instances": {"data": [["sentence", "proper", 253473544312511038, "TEXT", "#", 1.0, 3797235776056707210, 5485615449497097804, null, null, 0, 19, 0, 19, 0, 5, true, "FeSe is a material.", "FeSe is a material."], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 389609625538333940, 12313472961580748193, null, null, 0, 4, 0, 4, 0, 1, true, "FeSe", "FeSe"], ["term", "single-term", 253473544312511038, "TEXT", "#", 1.0, 14638289344044595472, 9648006590287322806, null, null, 10, 18, 10, 18, 3, 4, true, "material", "material"]], "headers": ["type", "subtype", "subj_hash", "subj_name", "subj_path", "conf", "hash", "ihash", "coor_i", "coor_j", "char_i", "char_j", "ctok_i", "ctok_j", "wtok_i", "wtok_j", "wtok-match", "name", "original"]}, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} diff --git a/tests/data/texts/test_02B_text_01.jsonl b/tests/data/texts/test_02B_text_01.jsonl index 0bc897d5..f472c0eb 100644 --- a/tests/data/texts/test_02B_text_01.jsonl +++ b/tests/data/texts/test_02B_text_01.jsonl @@ -1 +1 @@ -{"dloc": "", "hash": 253473544312511038, "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", "en", 0.5799999833106995]], "headers": ["type", "label", "confidence"]}, "prov": [], "text": "FeSe is a material.", "text-hash": 3797235776056707210, "type": "text"} +{"dloc": "#", "model-application": {"message": "success", "success": true}, "orig": "FeSe is a material.", "properties": {"data": [["language", 253473544312511038, "TEXT", "#", "en", 0.58]], "headers": ["type", "subj_hash", "subj_name", "subj_path", "label", "confidence"]}, "prov": [], "sref": "#", "subj_hash": 253473544312511038, "text": "FeSe is a material.", "text_hash": 3797235776056707210, "type": "text"} diff --git a/tests/test_glm.py b/tests/test_glm.py index 60c223a1..3dbbbfb6 100644 --- a/tests/test_glm.py +++ b/tests/test_glm.py @@ -31,7 +31,7 @@ def test_02A_create_glm_from_doc(): else: rdir = os.path.join(sdir, "glm_ref") odir = os.path.join(sdir, "glm_out") - + model_names = "semantic;name;verb;term;abbreviation" json_files = glob.glob(os.path.join(sdir, "docs/*.json")) diff --git a/tests/test_nlp.py b/tests/test_nlp.py index 771671ad..00accc53 100644 --- a/tests/test_nlp.py +++ b/tests/test_nlp.py @@ -1,8 +1,12 @@ #!/usr/bin/env python +GENERATE=False + import os import json +from tabulate import tabulate + from deepsearch_glm.nlp_utils import list_nlp_model_configs, init_nlp_model, \ extract_references_from_doc from deepsearch_glm.utils.load_pretrained_models import load_pretrained_nlp_models @@ -10,16 +14,11 @@ from deepsearch_glm.nlp_train_semantic import train_semantic -GENERATE=False - -def test_01_load_nlp_models(): - models = load_pretrained_nlp_models() - #print(f"models: {models}") - - assert "language" in models - assert "semantic" in models - assert "name" in models - assert "reference" in models +def round_floats(o): + if isinstance(o, float): return round(o, 2) + if isinstance(o, dict): return {k: round_floats(v) for k, v in o.items()} + if isinstance(o, (list, tuple)): return [round_floats(x) for x in o] + return o def check_dimensions(item): @@ -30,14 +29,37 @@ def check_dimensions(item): for row in item["data"]: assert len(row)==len(headers) -def test_02A_run_nlp_models_on_text(): +def get_reduced_instances(instances): + + headers = instances["headers"] + + table=[] + for row in instances["data"]: + if "reference" in row[0] and "texts" in row[4]: + table.append([row[0], row[1], row[4], row[5], row[-2]]) + + return table, [headers[0], headers[1], headers[4], headers[5], headers[-2]] + +def test_01_load_nlp_models(): + models = load_pretrained_nlp_models() + #print(f"models: {models}") + + assert "language" in models + assert "semantic" in models + assert "name" in models + assert "reference" in models + +# _run_nlp_models_on_text(): +def test_02A(): source = "./tests/data/texts/test_02A_text_01.jsonl" target = source model = init_nlp_model("sentence;language;term") + sres = model.apply_on_text("FeSe is a material.") - + sres = round_floats(sres) + if GENERATE: # generate the test-data fw = open(source, "w") @@ -50,7 +72,8 @@ def test_02A_run_nlp_models_on_text(): with open(target) as fr: tres = json.load(fr) - + tres = round_floats(tres) + for label in ["properties", "instances"]: check_dimensions(sres[label]) assert label in sres @@ -58,9 +81,13 @@ def test_02A_run_nlp_models_on_text(): for label in ["relations"]: assert label not in sres - assert tres==sres + #print(tres["properties"]) + #print(sres["properties"]) -def test_02B_run_nlp_models_on_text(): + assert tres==sres + +# _run_nlp_models_on_text(): +def test_02B(): source = "./tests/data/texts/test_02B_text_01.jsonl" target = source @@ -68,8 +95,10 @@ def test_02B_run_nlp_models_on_text(): filters = ["properties"] model = init_nlp_model("sentence;language;term", filters) - sres = model.apply_on_text("FeSe is a material.") + sres = model.apply_on_text("FeSe is a material.") + sres = round_floats(sres) + if GENERATE: # generate the test-data fw = open(source, "w") @@ -82,7 +111,8 @@ def test_02B_run_nlp_models_on_text(): with open(target) as fr: tres = json.load(fr) - + tres = round_floats(tres) + for label in ["text", "properties"]: assert label in sres @@ -91,15 +121,17 @@ def test_02B_run_nlp_models_on_text(): assert tres==sres -def test_03A_run_nlp_models_on_document(): +# _run_nlp_models_on_document(): +def test_03A(): with open("./tests/data/docs/1806.02284.json") as fr: doc = json.load(fr) model = init_nlp_model("sentence;language;term;reference;abbreviation") - res = model.apply_on_doc(doc) - #print(res.keys()) + res = model.apply_on_doc(doc) + res = round_floats(res) + for label in ["description", "body", "meta", "page-elements", "texts", "tables", "figures", "properties", "instances", "relations"]: @@ -108,19 +140,21 @@ def test_03A_run_nlp_models_on_document(): check_dimensions(res["properties"]) check_dimensions(res["instances"]) check_dimensions(res["relations"]) - -def test_03B_run_nlp_models_on_document(): + +# _run_nlp_models_on_document(): +def test_03B(): with open("./tests/data/docs/1806.02284.json") as fr: doc = json.load(fr) - filters = ["applied-models", "properties"] + filters = ["applied_models", "properties"] model = init_nlp_model("sentence;language;term;reference", filters) + res = model.apply_on_doc(doc) - #print(res.keys()) + res = round_floats(res) - for label in ["dloc", "applied-models", + for label in ["dloc", "applied_models", "description", "body", "meta", "page-elements", "texts", "tables", "figures", "properties"]: @@ -131,7 +165,8 @@ def test_03B_run_nlp_models_on_document(): check_dimensions(res["properties"]) -def test_03C_run_nlp_models_on_document(): +#_run_nlp_models_on_document(): +def test_03C(): model = init_nlp_model("language;semantic;sentence;term;verb;conn;geoloc;reference") @@ -143,7 +178,9 @@ def test_03C_run_nlp_models_on_document(): doc = json.load(fr) res = model.apply_on_doc(doc) - extract_references_from_doc(res) + res = round_floats(res) + + #extract_references_from_doc(res) fw = open(target, "w") fw.write(json.dumps(res, indent=2)+"\n") @@ -156,13 +193,108 @@ def test_03C_run_nlp_models_on_document(): sdoc = json.load(fr) res = model.apply_on_doc(sdoc) + res = round_floats(res) with open(target) as fr: tdoc = json.load(fr) + tdoc = round_floats(tdoc) + + assert res==tdoc + +# run_nlp_models_on_document(): +def test_03D(): + + model_i = init_nlp_model("term") + model_j = init_nlp_model("reference") + #model_j = init_nlp_model("verb") + + model_k = init_nlp_model("term;reference") + #model_k = init_nlp_model("term;verb") + + source = "./tests/data/docs/1806.02284.json" + target_i = "./tests/data/docs/1806.02284.nlp.i.json" + target_j = "./tests/data/docs/1806.02284.nlp.j.json" + target_k = "./tests/data/docs/1806.02284.nlp.k.json" + + if True: # generate the test-data + with open(source) as fr: + doc = json.load(fr) + + #print("apply model_i") + res_i = model_i.apply_on_doc(doc) + #res_i = round_floats(res_i) + + fw = open(target_i, "w") + fw.write(json.dumps(res_i, indent=2)+"\n") + fw.close() + + #print("apply model_j") + res_j = model_j.apply_on_doc(res_i) + #res_j = model_j.apply_on_doc(doc) + res_j = round_floats(res_j) + + fw = open(target_j, "w") + fw.write(json.dumps(res_j, indent=2)+"\n") + fw.close() + + #print("apply model_k") + res_k = model_k.apply_on_doc(doc) + res_k = round_floats(res_k) + + fw = open(target_k, "w") + fw.write(json.dumps(res_k, indent=2)+"\n") + fw.close() + + assert res_j["tables"]==res_k["tables"] + + """ + print(tabulate(res_j["properties"]["data"][0:30], + headers=res_j["properties"]["headers"])) + print(tabulate(res_k["properties"]["data"][0:30], + headers=res_k["properties"]["headers"])) + """ + + assert len(res_j["properties"]["data"])==len(res_k["properties"]["data"]) + assert res_j["properties"]["data"]==res_k["properties"]["data"] + + table_i, headers_i = get_reduced_instances(res_i["instances"]) + table_j, headers_j = get_reduced_instances(res_j["instances"]) + table_k, headers_k = get_reduced_instances(res_k["instances"]) + + #print(tabulate(table_j, headers=headers_j)) + #print(tabulate(table_k, headers=headers_k)) + + """ + print("#-inst-i: ", len(table_i)) + print("#-inst-j: ", len(table_j)) + print("#-inst-k: ", len(table_k)) + """ + assert table_j==table_k + + #print("#-instances-j: ", len(res_j["instances"]["data"])) + #print("#-instances-j: ", len(res_k["instances"]["data"])) + + assert len(res_j["instances"]["data"])==len(res_k["instances"]["data"]) + assert res_j["instances"]["data"]==res_k["instances"]["data"] + + assert res_j==res_k + + else: + with open(source) as fr: + sdoc = json.load(fr) + + res = model.apply_on_doc(sdoc) + res = round_floats(res) + + with open(target) as fr: + tdoc = json.load(fr) + tdoc = round_floats(tdoc) + assert res==tdoc -def test_04A_terms(): +# test term model +def test_04A(): source = "./tests/data/texts/terms.jsonl" target = "./tests/data/texts/terms.nlp.jsonl" @@ -177,8 +309,11 @@ def test_04A_terms(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) - + res = round_floats(res) + fw.write(json.dumps(res)+"\n") fw.close() @@ -189,25 +324,27 @@ def test_04A_terms(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) + """ for i,row_i in enumerate(res["properties"]["data"]): row_j = data["properties"]["data"][i] - #print(i, "\t", row_i) - #print(i, "\t", row_j) assert row_i==row_j for i,row_i in enumerate(res["instances"]["data"]): row_j = data["instances"]["data"][i] - #print(i, "\t", row_i) - #print(i, "\t", row_j) assert row_i==row_j - + """ + assert res==data assert True -def test_04B_semantic(): +# test semantic classifier +def test_04B(): model = init_nlp_model("semantic") @@ -223,7 +360,10 @@ def test_04B_semantic(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) fw.write(json.dumps(res)+"\n") @@ -236,15 +376,21 @@ def test_04B_semantic(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) + """ for i,row_i in enumerate(res["properties"]["data"]): row_j = data["properties"]["data"][i] assert row_i==row_j - + """ + assert res==data -def test_04C_references(): +# test reference model +def test_04C(): model = init_nlp_model("reference") @@ -260,7 +406,10 @@ def test_04C_references(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) + res = round_floats(res) fw.write(json.dumps(res)+"\n") @@ -273,45 +422,55 @@ def test_04C_references(): for line in lines: data = json.loads(line) + data = round_floats(data) + res = model.apply_on_text(data["text"]) - + res = round_floats(res) + assert res==data -def test_05_to_legacy(): +def test_05A(): - model = init_nlp_model("reference") + model = init_nlp_model("reference;term") source = "./tests/data/docs/doc_01.old.json" target_leg = "./tests/data/docs/doc_01.leg.json" target_nlp = "./tests/data/docs/doc_01.nlp.json" - print(f"reading {source} ... ", end="") + #print(f"reading {source} ... ", end="") with open(source, "r") as fr: doc_i = json.load(fr) if GENERATE: doc_j = model.apply_on_doc(doc_i) - + doc_j = round_floats(doc_j) + with open(target_nlp, "w") as fw: fw.write(json.dumps(doc_j, indent=2)) + """ doc_i = to_legacy_document_format(doc_j, doc_i) with open(target_leg, "w") as fw: fw.write(json.dumps(doc_i, indent=2)) + """ else: with open(target_nlp, "r") as fr: doc_nlp = json.load(fr) - + doc_nlp = round_floats(doc_nlp) + with open(target_leg, "r") as fr: doc_leg = json.load(fr) - + doc_leg = round_floats(doc_leg) + doc_j = model.apply_on_doc(doc_i) - + doc_j = round_floats(doc_j) + assert doc_j==doc_nlp doc_i = to_legacy_document_format(doc_j, doc_i) + doc_i = round_floats(doc_i) assert doc_i==doc_leg